import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "archive/zip"
20 "encoding/csv"
21 "encoding/json"
22 "fmt"
23 "html"
24 "io/ioutil"
25 "log"
26 "os"
27 "regexp"
28 "sort"
29 "strings"
30 "time"
31
32 "humungus.tedunangst.com/r/webs/junk"
33)
34
35func importMain(username, flavor, source string) {
36 switch flavor {
37 case "mastodon":
38 importMastodon(username, source)
39 case "honk":
40 importHonk(username, source)
41 case "twitter":
42 importTwitter(username, source)
43 case "instagram":
44 importInstagram(username, source)
45 default:
46 elog.Fatal("unknown source flavor")
47 }
48}
49
50type ActivityObject struct {
51 AttributedTo string
52 Summary string
53 Content string
54 InReplyTo string
55 Conversation string
56 Context string
57 Published time.Time
58 Tag []struct {
59 Type string
60 Name string
61 }
62 Attachment []struct {
63 Type string
64 MediaType string
65 Url string
66 Name string
67 }
68}
69
70type PlainActivityObject ActivityObject
71
72func (obj *ActivityObject) UnmarshalJSON(b []byte) error {
73 p := (*PlainActivityObject)(obj)
74 json.Unmarshal(b, p)
75 return nil
76}
77
78func importMastodon(username, source string) {
79 user, err := butwhatabout(username)
80 if err != nil {
81 elog.Fatal(err)
82 }
83
84 outbox := source + "/outbox.json"
85 if _, err := os.Stat(outbox); err == nil {
86 importActivities(user, outbox, source)
87 } else {
88 ilog.Printf("skipping outbox.json!")
89 }
90 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
91 importMastotooters(user, source)
92 } else {
93 ilog.Printf("skipping following_accounts.csv!")
94 }
95}
96
97func importHonk(username, source string) {
98 user, err := butwhatabout(username)
99 if err != nil {
100 elog.Fatal(err)
101 }
102
103 outbox := source + "/outbox.json"
104 if _, err := os.Stat(outbox); err == nil {
105 importActivities(user, outbox, source)
106 } else {
107 ilog.Printf("skipping outbox.json!")
108 }
109}
110
111func importActivities(user *WhatAbout, filename, source string) {
112 type Activity struct {
113 Id string
114 Type string
115 To interface{}
116 Cc []string
117 Object ActivityObject
118 }
119 var outbox struct {
120 OrderedItems []Activity
121 }
122 ilog.Println("Importing honks...")
123 fd, err := os.Open(filename)
124 if err != nil {
125 elog.Fatal(err)
126 }
127 dec := json.NewDecoder(fd)
128 err = dec.Decode(&outbox)
129 if err != nil {
130 elog.Fatalf("error parsing json: %s", err)
131 }
132 fd.Close()
133
134 havetoot := func(xid string) bool {
135 var id int64
136 row := stmtFindXonk.QueryRow(user.ID, xid)
137 err := row.Scan(&id)
138 if err == nil {
139 return true
140 }
141 return false
142 }
143
144 re_tootid := regexp.MustCompile("[^/]+$")
145 items := outbox.OrderedItems
146 for i, j := 0, len(items)-1; i < j; i, j = i+1, j-1 {
147 items[i], items[j] = items[j], items[i]
148 }
149 for _, item := range items {
150 toot := item
151 if toot.Type != "Create" {
152 continue
153 }
154 if strings.HasSuffix(toot.Id, "/activity") {
155 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
156 }
157 tootid := re_tootid.FindString(toot.Id)
158 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
159 if havetoot(xid) {
160 continue
161 }
162
163 convoy := toot.Object.Context
164 if convoy == "" {
165 convoy = toot.Object.Conversation
166 }
167 var audience []string
168 to, ok := toot.To.(string)
169 if ok {
170 audience = append(audience, to)
171 } else {
172 for _, t := range toot.To.([]interface{}) {
173 audience = append(audience, t.(string))
174 }
175 }
176 audience = append(audience, toot.Cc...)
177 honk := Honk{
178 UserID: user.ID,
179 What: "honk",
180 Honker: user.URL,
181 XID: xid,
182 RID: toot.Object.InReplyTo,
183 Date: toot.Object.Published,
184 URL: xid,
185 Audience: audience,
186 Noise: toot.Object.Content,
187 Convoy: convoy,
188 Whofore: 2,
189 Format: "html",
190 Precis: toot.Object.Summary,
191 }
192 if !loudandproud(honk.Audience) {
193 honk.Whofore = 3
194 }
195 for _, att := range toot.Object.Attachment {
196 switch att.Type {
197 case "Document":
198 fname := fmt.Sprintf("%s/%s", source, att.Url)
199 data, err := ioutil.ReadFile(fname)
200 if err != nil {
201 elog.Printf("error reading media: %s", fname)
202 continue
203 }
204 u := xfiltrate()
205 name := att.Name
206 desc := name
207 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
208 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
209 if err != nil {
210 elog.Printf("error saving media: %s", fname)
211 continue
212 }
213 donk := &Donk{
214 FileID: fileid,
215 }
216 honk.Donks = append(honk.Donks, donk)
217 }
218 }
219 for _, t := range toot.Object.Tag {
220 switch t.Type {
221 case "Hashtag":
222 honk.Onts = append(honk.Onts, t.Name)
223 }
224 }
225 savehonk(&honk)
226 }
227}
228
229func importMastotooters(user *WhatAbout, source string) {
230 ilog.Println("Importing honkers...")
231 fd, err := os.Open(source + "/following_accounts.csv")
232 if err != nil {
233 elog.Fatal(err)
234 }
235 r := csv.NewReader(fd)
236 data, err := r.ReadAll()
237 if err != nil {
238 elog.Fatal(err)
239 }
240 fd.Close()
241
242 var meta HonkerMeta
243 mj, _ := jsonify(&meta)
244
245 for i, d := range data {
246 if i == 0 {
247 continue
248 }
249 url := "@" + d[0]
250 name := ""
251 flavor := "peep"
252 combos := ""
253 _, err := savehonker(user, url, name, flavor, combos, mj)
254 if err != nil {
255 elog.Printf("trouble with a honker: %s", err)
256 }
257 }
258}
259
260func importTwitter(username, source string) {
261 user, err := butwhatabout(username)
262 if err != nil {
263 elog.Fatal(err)
264 }
265
266 type Tweet struct {
267 date time.Time
268 convoy string
269 Tweet struct {
270 CreatedAt string `json:"created_at"`
271 DisplayTextRange []string `json:"display_text_range"`
272 EditInfo struct {
273 Initial struct {
274 EditTweetIds []string `json:"editTweetIds"`
275 EditableUntil string `json:"editableUntil"`
276 EditsRemaining string `json:"editsRemaining"`
277 IsEditEligible bool `json:"isEditEligible"`
278 } `json:"initial"`
279 } `json:"edit_info"`
280 Entities struct {
281 Hashtags []struct {
282 Indices []string `json:"indices"`
283 Text string `json:"text"`
284 } `json:"hashtags"`
285 Media []struct {
286 DisplayURL string `json:"display_url"`
287 ExpandedURL string `json:"expanded_url"`
288 ID string `json:"id"`
289 IdStr string `json:"id_str"`
290 Indices []string `json:"indices"`
291 MediaURL string `json:"media_url"`
292 MediaUrlHttps string `json:"media_url_https"`
293 Sizes struct {
294 Large struct {
295 H string `json:"h"`
296 Resize string `json:"resize"`
297 W string `json:"w"`
298 } `json:"large"`
299 Medium struct {
300 H string `json:"h"`
301 Resize string `json:"resize"`
302 W string `json:"w"`
303 } `json:"medium"`
304 Small struct {
305 H string `json:"h"`
306 Resize string `json:"resize"`
307 W string `json:"w"`
308 } `json:"small"`
309 Thumb struct {
310 H string `json:"h"`
311 Resize string `json:"resize"`
312 W string `json:"w"`
313 } `json:"thumb"`
314 } `json:"sizes"`
315 Type string `json:"type"`
316 URL string `json:"url"`
317 } `json:"media"`
318 Symbols []interface{} `json:"symbols"`
319 Urls []struct {
320 DisplayURL string `json:"display_url"`
321 ExpandedURL string `json:"expanded_url"`
322 Indices []string `json:"indices"`
323 URL string `json:"url"`
324 } `json:"urls"`
325 UserMentions []interface{} `json:"user_mentions"`
326 } `json:"entities"`
327 ExtendedEntities struct {
328 Media []struct {
329 DisplayURL string `json:"display_url"`
330 ExpandedURL string `json:"expanded_url"`
331 ID string `json:"id"`
332 IdStr string `json:"id_str"`
333 Indices []string `json:"indices"`
334 MediaURL string `json:"media_url"`
335 MediaUrlHttps string `json:"media_url_https"`
336 Sizes struct {
337 Large struct {
338 H string `json:"h"`
339 Resize string `json:"resize"`
340 W string `json:"w"`
341 } `json:"large"`
342 Medium struct {
343 H string `json:"h"`
344 Resize string `json:"resize"`
345 W string `json:"w"`
346 } `json:"medium"`
347 Small struct {
348 H string `json:"h"`
349 Resize string `json:"resize"`
350 W string `json:"w"`
351 } `json:"small"`
352 Thumb struct {
353 H string `json:"h"`
354 Resize string `json:"resize"`
355 W string `json:"w"`
356 } `json:"thumb"`
357 } `json:"sizes"`
358 Type string `json:"type"`
359 URL string `json:"url"`
360 } `json:"media"`
361 } `json:"extended_entities"`
362 FavoriteCount string `json:"favorite_count"`
363 Favorited bool `json:"favorited"`
364 FullText string `json:"full_text"`
365 ID string `json:"id"`
366 IdStr string `json:"id_str"`
367 InReplyToScreenName string `json:"in_reply_to_screen_name"`
368 InReplyToStatusID string `json:"in_reply_to_status_id"`
369 InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
370 InReplyToUserID string `json:"in_reply_to_user_id"`
371 InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
372 Lang string `json:"lang"`
373 PossiblySensitive bool `json:"possibly_sensitive"`
374 RetweetCount string `json:"retweet_count"`
375 Retweeted bool `json:"retweeted"`
376 Source string `json:"source"`
377 Truncated bool `json:"truncated"`
378 } `json:"tweet"`
379 }
380
381 var tweets []*Tweet
382 fd, err := os.Open(source + "/tweets.js")
383 if err != nil {
384 elog.Fatal(err)
385 }
386 // skip past window.YTD.tweet.part0 =
387 fd.Seek(25, 0)
388 dec := json.NewDecoder(fd)
389 err = dec.Decode(&tweets)
390 if err != nil {
391 elog.Fatalf("error parsing json: %s", err)
392 }
393 fd.Close()
394 tweetmap := make(map[string]*Tweet)
395 for _, t := range tweets {
396 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
397 tweetmap[t.Tweet.IdStr] = t
398 }
399 sort.Slice(tweets, func(i, j int) bool {
400 return tweets[i].date.Before(tweets[j].date)
401 })
402 havetwid := func(xid string) bool {
403 var id int64
404 row := stmtFindXonk.QueryRow(user.ID, xid)
405 err := row.Scan(&id)
406 if err == nil {
407 log.Printf("id = %v", id)
408 return true
409 }
410 return false
411 }
412 log.Printf("importing %v tweets", len(tweets))
413 for _, t := range tweets {
414 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
415 if havetwid(xid) {
416 continue
417 }
418
419 what := "honk"
420 noise := ""
421 if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
422 t.convoy = parent.convoy
423 } else {
424 t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
425 if t.Tweet.InReplyToScreenName != "" {
426 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
427 t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
428 }
429 }
430 audience := []string{thewholeworld}
431 honk := Honk{
432 UserID: user.ID,
433 Username: user.Name,
434 What: what,
435 Honker: user.URL,
436 XID: xid,
437 Date: t.date,
438 Format: "markdown",
439 Audience: audience,
440 Convoy: t.convoy,
441 Public: true,
442 Whofore: 2,
443 }
444 noise += t.Tweet.FullText
445 // unbelievable
446 noise = html.UnescapeString(noise)
447 for _, r := range t.Tweet.Entities.Urls {
448 noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
449 }
450 for _, m := range t.Tweet.Entities.Media {
451 u := m.MediaURL
452 idx := strings.LastIndexByte(u, '/')
453 u = u[idx+1:]
454 fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
455 data, err := ioutil.ReadFile(fname)
456 if err != nil {
457 elog.Printf("error reading media: %s", fname)
458 continue
459 }
460 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
461
462 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
463 if err != nil {
464 elog.Printf("error saving media: %s", fname)
465 continue
466 }
467 donk := &Donk{
468 FileID: fileid,
469 }
470 honk.Donks = append(honk.Donks, donk)
471 noise = strings.Replace(noise, m.URL, "", -1)
472 }
473 for _, ht := range t.Tweet.Entities.Hashtags {
474 honk.Onts = append(honk.Onts, "#"+ht.Text)
475 }
476 honk.Noise = noise
477 err := savehonk(&honk)
478 log.Printf("honk saved %v -> %v", xid, err)
479 }
480}
481
482func importInstagram(username, source string) {
483 user, err := butwhatabout(username)
484 if err != nil {
485 elog.Fatal(err)
486 }
487
488 type Gram struct {
489 Media []struct {
490 URI string
491 Creation int64 `json:"creation_timestamp"`
492 Title string
493 }
494 }
495
496 var grams []*Gram
497 fd, err := os.Open(source + "/content/posts_1.json")
498 if err != nil {
499 elog.Fatal(err)
500 }
501 dec := json.NewDecoder(fd)
502 err = dec.Decode(&grams)
503 if err != nil {
504 elog.Fatalf("error parsing json: %s", err)
505 }
506 fd.Close()
507 log.Printf("importing %d grams", len(grams))
508 sort.Slice(grams, func(i, j int) bool {
509 return grams[i].Media[0].Creation < grams[j].Media[0].Creation
510 })
511 for _, g0 := range grams {
512 g := g0.Media[0]
513 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
514 what := "honk"
515 noise := g.Title
516 convoy := "data:,acoustichonkytonk-" + xfiltrate()
517 date := time.Unix(g.Creation, 0)
518 audience := []string{thewholeworld}
519 honk := Honk{
520 UserID: user.ID,
521 Username: user.Name,
522 What: what,
523 Honker: user.URL,
524 XID: xid,
525 Date: date,
526 Format: "markdown",
527 Audience: audience,
528 Convoy: convoy,
529 Public: true,
530 Whofore: 2,
531 }
532 {
533 u := xfiltrate()
534 fname := fmt.Sprintf("%s/%s", source, g.URI)
535 data, err := ioutil.ReadFile(fname)
536 if err != nil {
537 elog.Printf("error reading media: %s", fname)
538 continue
539 }
540 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
541
542 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
543 if err != nil {
544 elog.Printf("error saving media: %s", fname)
545 continue
546 }
547 donk := &Donk{
548 FileID: fileid,
549 }
550 honk.Donks = append(honk.Donks, donk)
551 }
552 honk.Noise = noise
553 err := savehonk(&honk)
554 log.Printf("honk saved %v -> %v", xid, err)
555 }
556}
557
558func export(username, file string) {
559 user, err := butwhatabout(username)
560 if err != nil {
561 elog.Fatal(err)
562 }
563 fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
564 if err != nil {
565 elog.Fatal(err)
566 }
567 zd := zip.NewWriter(fd)
568 donks := make(map[string]bool)
569 {
570 w, err := zd.Create("outbox.json")
571 if err != nil {
572 elog.Fatal(err)
573 }
574 var jonks []junk.Junk
575 rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
576 honks := getsomehonks(rows, err)
577 for _, honk := range honks {
578 for _, donk := range honk.Donks {
579 donk.URL = "media/" + donk.XID
580 donks[donk.XID] = true
581 }
582 noise := honk.Noise
583 j, jo := jonkjonk(user, honk)
584 if honk.Format == "markdown" {
585 jo["source"] = noise
586 }
587 jonks = append(jonks, j)
588 }
589 j := junk.New()
590 j["@context"] = itiswhatitis
591 j["id"] = user.URL + "/outbox"
592 j["attributedTo"] = user.URL
593 j["type"] = "OrderedCollection"
594 j["totalItems"] = len(jonks)
595 j["orderedItems"] = jonks
596 j.Write(w)
597 }
598 {
599 w, err := zd.Create("inbox.json")
600 if err != nil {
601 elog.Fatal(err)
602 }
603 var jonks []junk.Junk
604 rows, err := stmtHonksForMe.Query(0, user.ID, "0", user.ID, 1234567)
605 honks := getsomehonks(rows, err)
606 for _, honk := range honks {
607 for _, donk := range honk.Donks {
608 donk.URL = "media/" + donk.XID
609 donks[donk.XID] = true
610 }
611 j, _ := jonkjonk(user, honk)
612 jonks = append(jonks, j)
613 }
614 j := junk.New()
615 j["@context"] = itiswhatitis
616 j["id"] = user.URL + "/inbox"
617 j["attributedTo"] = user.URL
618 j["type"] = "OrderedCollection"
619 j["totalItems"] = len(jonks)
620 j["orderedItems"] = jonks
621 j.Write(w)
622 }
623 zd.Create("media/")
624 for donk := range donks {
625 var media string
626 var data []byte
627 w, err := zd.Create("media/" + donk)
628 if err != nil {
629 elog.Fatal(err)
630 }
631 row := stmtGetFileData.QueryRow(donk)
632 err = row.Scan(&media, &data)
633 if err != nil {
634 elog.Fatal(err)
635 }
636 w.Write(data)
637 }
638 zd.Close()
639 fd.Close()
640}