import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "archive/zip"
20 "encoding/csv"
21 "encoding/json"
22 "fmt"
23 "html"
24 "io/ioutil"
25 "log"
26 "os"
27 "regexp"
28 "sort"
29 "strings"
30 "time"
31
32 "humungus.tedunangst.com/r/webs/junk"
33)
34
35func importMain(username, flavor, source string) {
36 switch flavor {
37 case "mastodon":
38 importMastodon(username, source)
39 case "twitter":
40 importTwitter(username, source)
41 case "instagram":
42 importInstagram(username, source)
43 default:
44 elog.Fatal("unknown source flavor")
45 }
46}
47
48type TootObject struct {
49 Summary string
50 Content string
51 InReplyTo string
52 Conversation string
53 Published time.Time
54 Tag []struct {
55 Type string
56 Name string
57 }
58 Attachment []struct {
59 Type string
60 MediaType string
61 Url string
62 Name string
63 }
64}
65
66type PlainTootObject TootObject
67
68func (obj *TootObject) UnmarshalJSON(b []byte) error {
69 p := (*PlainTootObject)(obj)
70 json.Unmarshal(b, p)
71 return nil
72}
73
74func importMastodon(username, source string) {
75 user, err := butwhatabout(username)
76 if err != nil {
77 elog.Fatal(err)
78 }
79
80 if _, err := os.Stat(source + "/outbox.json"); err == nil {
81 importMastotoots(user, source)
82 } else {
83 ilog.Printf("skipping outbox.json!")
84 }
85 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
86 importMastotooters(user, source)
87 } else {
88 ilog.Printf("skipping following_accounts.csv!")
89 }
90}
91
92func importMastotoots(user *WhatAbout, source string) {
93 type Toot struct {
94 Id string
95 Type string
96 To []string
97 Cc []string
98 Object TootObject
99 }
100 var outbox struct {
101 OrderedItems []Toot
102 }
103 ilog.Println("Importing honks...")
104 fd, err := os.Open(source + "/outbox.json")
105 if err != nil {
106 elog.Fatal(err)
107 }
108 dec := json.NewDecoder(fd)
109 err = dec.Decode(&outbox)
110 if err != nil {
111 elog.Fatalf("error parsing json: %s", err)
112 }
113 fd.Close()
114
115 havetoot := func(xid string) bool {
116 var id int64
117 row := stmtFindXonk.QueryRow(user.ID, xid)
118 err := row.Scan(&id)
119 if err == nil {
120 return true
121 }
122 return false
123 }
124
125 re_tootid := regexp.MustCompile("[^/]+$")
126 for _, item := range outbox.OrderedItems {
127 toot := item
128 if toot.Type != "Create" {
129 continue
130 }
131 if strings.HasSuffix(toot.Id, "/activity") {
132 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
133 }
134 tootid := re_tootid.FindString(toot.Id)
135 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
136 if havetoot(xid) {
137 continue
138 }
139 honk := Honk{
140 UserID: user.ID,
141 What: "honk",
142 Honker: user.URL,
143 XID: xid,
144 RID: toot.Object.InReplyTo,
145 Date: toot.Object.Published,
146 URL: xid,
147 Audience: append(toot.To, toot.Cc...),
148 Noise: toot.Object.Content,
149 Convoy: toot.Object.Conversation,
150 Whofore: 2,
151 Format: "html",
152 Precis: toot.Object.Summary,
153 }
154 if !loudandproud(honk.Audience) {
155 honk.Whofore = 3
156 }
157 for _, att := range toot.Object.Attachment {
158 switch att.Type {
159 case "Document":
160 fname := fmt.Sprintf("%s/%s", source, att.Url)
161 data, err := ioutil.ReadFile(fname)
162 if err != nil {
163 elog.Printf("error reading media: %s", fname)
164 continue
165 }
166 u := xfiltrate()
167 name := att.Name
168 desc := name
169 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
170 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
171 if err != nil {
172 elog.Printf("error saving media: %s", fname)
173 continue
174 }
175 donk := &Donk{
176 FileID: fileid,
177 }
178 honk.Donks = append(honk.Donks, donk)
179 }
180 }
181 for _, t := range toot.Object.Tag {
182 switch t.Type {
183 case "Hashtag":
184 honk.Onts = append(honk.Onts, t.Name)
185 }
186 }
187 savehonk(&honk)
188 }
189}
190
191func importMastotooters(user *WhatAbout, source string) {
192 ilog.Println("Importing honkers...")
193 fd, err := os.Open(source + "/following_accounts.csv")
194 if err != nil {
195 elog.Fatal(err)
196 }
197 r := csv.NewReader(fd)
198 data, err := r.ReadAll()
199 if err != nil {
200 elog.Fatal(err)
201 }
202 fd.Close()
203
204 var meta HonkerMeta
205 mj, _ := jsonify(&meta)
206
207 for i, d := range data {
208 if i == 0 {
209 continue
210 }
211 url := "@" + d[0]
212 name := ""
213 flavor := "peep"
214 combos := ""
215 _, err := savehonker(user, url, name, flavor, combos, mj)
216 if err != nil {
217 elog.Printf("trouble with a honker: %s", err)
218 }
219 }
220}
221
222func importTwitter(username, source string) {
223 user, err := butwhatabout(username)
224 if err != nil {
225 elog.Fatal(err)
226 }
227
228 type Tweet struct {
229 date time.Time
230 convoy string
231 Tweet struct {
232 CreatedAt string `json:"created_at"`
233 DisplayTextRange []string `json:"display_text_range"`
234 EditInfo struct {
235 Initial struct {
236 EditTweetIds []string `json:"editTweetIds"`
237 EditableUntil string `json:"editableUntil"`
238 EditsRemaining string `json:"editsRemaining"`
239 IsEditEligible bool `json:"isEditEligible"`
240 } `json:"initial"`
241 } `json:"edit_info"`
242 Entities struct {
243 Hashtags []struct {
244 Indices []string `json:"indices"`
245 Text string `json:"text"`
246 } `json:"hashtags"`
247 Media []struct {
248 DisplayURL string `json:"display_url"`
249 ExpandedURL string `json:"expanded_url"`
250 ID string `json:"id"`
251 IdStr string `json:"id_str"`
252 Indices []string `json:"indices"`
253 MediaURL string `json:"media_url"`
254 MediaUrlHttps string `json:"media_url_https"`
255 Sizes struct {
256 Large struct {
257 H string `json:"h"`
258 Resize string `json:"resize"`
259 W string `json:"w"`
260 } `json:"large"`
261 Medium struct {
262 H string `json:"h"`
263 Resize string `json:"resize"`
264 W string `json:"w"`
265 } `json:"medium"`
266 Small struct {
267 H string `json:"h"`
268 Resize string `json:"resize"`
269 W string `json:"w"`
270 } `json:"small"`
271 Thumb struct {
272 H string `json:"h"`
273 Resize string `json:"resize"`
274 W string `json:"w"`
275 } `json:"thumb"`
276 } `json:"sizes"`
277 Type string `json:"type"`
278 URL string `json:"url"`
279 } `json:"media"`
280 Symbols []interface{} `json:"symbols"`
281 Urls []struct {
282 DisplayURL string `json:"display_url"`
283 ExpandedURL string `json:"expanded_url"`
284 Indices []string `json:"indices"`
285 URL string `json:"url"`
286 } `json:"urls"`
287 UserMentions []interface{} `json:"user_mentions"`
288 } `json:"entities"`
289 ExtendedEntities struct {
290 Media []struct {
291 DisplayURL string `json:"display_url"`
292 ExpandedURL string `json:"expanded_url"`
293 ID string `json:"id"`
294 IdStr string `json:"id_str"`
295 Indices []string `json:"indices"`
296 MediaURL string `json:"media_url"`
297 MediaUrlHttps string `json:"media_url_https"`
298 Sizes struct {
299 Large struct {
300 H string `json:"h"`
301 Resize string `json:"resize"`
302 W string `json:"w"`
303 } `json:"large"`
304 Medium struct {
305 H string `json:"h"`
306 Resize string `json:"resize"`
307 W string `json:"w"`
308 } `json:"medium"`
309 Small struct {
310 H string `json:"h"`
311 Resize string `json:"resize"`
312 W string `json:"w"`
313 } `json:"small"`
314 Thumb struct {
315 H string `json:"h"`
316 Resize string `json:"resize"`
317 W string `json:"w"`
318 } `json:"thumb"`
319 } `json:"sizes"`
320 Type string `json:"type"`
321 URL string `json:"url"`
322 } `json:"media"`
323 } `json:"extended_entities"`
324 FavoriteCount string `json:"favorite_count"`
325 Favorited bool `json:"favorited"`
326 FullText string `json:"full_text"`
327 ID string `json:"id"`
328 IdStr string `json:"id_str"`
329 InReplyToScreenName string `json:"in_reply_to_screen_name"`
330 InReplyToStatusID string `json:"in_reply_to_status_id"`
331 InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
332 InReplyToUserID string `json:"in_reply_to_user_id"`
333 InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
334 Lang string `json:"lang"`
335 PossiblySensitive bool `json:"possibly_sensitive"`
336 RetweetCount string `json:"retweet_count"`
337 Retweeted bool `json:"retweeted"`
338 Source string `json:"source"`
339 Truncated bool `json:"truncated"`
340 } `json:"tweet"`
341 }
342
343 var tweets []*Tweet
344 fd, err := os.Open(source + "/tweets.js")
345 if err != nil {
346 elog.Fatal(err)
347 }
348 // skip past window.YTD.tweet.part0 =
349 fd.Seek(25, 0)
350 dec := json.NewDecoder(fd)
351 err = dec.Decode(&tweets)
352 if err != nil {
353 elog.Fatalf("error parsing json: %s", err)
354 }
355 fd.Close()
356 tweetmap := make(map[string]*Tweet)
357 for _, t := range tweets {
358 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
359 tweetmap[t.Tweet.IdStr] = t
360 }
361 sort.Slice(tweets, func(i, j int) bool {
362 return tweets[i].date.Before(tweets[j].date)
363 })
364 havetwid := func(xid string) bool {
365 var id int64
366 row := stmtFindXonk.QueryRow(user.ID, xid)
367 err := row.Scan(&id)
368 if err == nil {
369 log.Printf("id = %v", id)
370 return true
371 }
372 return false
373 }
374 log.Printf("importing %v tweets", len(tweets))
375 for _, t := range tweets {
376 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
377 if havetwid(xid) {
378 continue
379 }
380
381 what := "honk"
382 noise := ""
383 if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
384 t.convoy = parent.convoy
385 } else {
386 t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
387 if t.Tweet.InReplyToScreenName != "" {
388 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
389 t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
390 }
391 }
392 audience := []string{thewholeworld}
393 honk := Honk{
394 UserID: user.ID,
395 Username: user.Name,
396 What: what,
397 Honker: user.URL,
398 XID: xid,
399 Date: t.date,
400 Format: "markdown",
401 Audience: audience,
402 Convoy: t.convoy,
403 Public: true,
404 Whofore: 2,
405 }
406 noise += t.Tweet.FullText
407 // unbelievable
408 noise = html.UnescapeString(noise)
409 for _, r := range t.Tweet.Entities.Urls {
410 noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
411 }
412 for _, m := range t.Tweet.Entities.Media {
413 u := m.MediaURL
414 idx := strings.LastIndexByte(u, '/')
415 u = u[idx+1:]
416 fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
417 data, err := ioutil.ReadFile(fname)
418 if err != nil {
419 elog.Printf("error reading media: %s", fname)
420 continue
421 }
422 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
423
424 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
425 if err != nil {
426 elog.Printf("error saving media: %s", fname)
427 continue
428 }
429 donk := &Donk{
430 FileID: fileid,
431 }
432 honk.Donks = append(honk.Donks, donk)
433 noise = strings.Replace(noise, m.URL, "", -1)
434 }
435 for _, ht := range t.Tweet.Entities.Hashtags {
436 honk.Onts = append(honk.Onts, "#"+ht.Text)
437 }
438 honk.Noise = noise
439 err := savehonk(&honk)
440 log.Printf("honk saved %v -> %v", xid, err)
441 }
442}
443
444func importInstagram(username, source string) {
445 user, err := butwhatabout(username)
446 if err != nil {
447 elog.Fatal(err)
448 }
449
450 type Gram struct {
451 Media []struct {
452 URI string
453 Creation int64 `json:"creation_timestamp"`
454 Title string
455 }
456 }
457
458 var grams []*Gram
459 fd, err := os.Open(source + "/content/posts_1.json")
460 if err != nil {
461 elog.Fatal(err)
462 }
463 dec := json.NewDecoder(fd)
464 err = dec.Decode(&grams)
465 if err != nil {
466 elog.Fatalf("error parsing json: %s", err)
467 }
468 fd.Close()
469 log.Printf("importing %d grams", len(grams))
470 sort.Slice(grams, func(i, j int) bool {
471 return grams[i].Media[0].Creation < grams[j].Media[0].Creation
472 })
473 for _, g0 := range grams {
474 g := g0.Media[0]
475 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
476 what := "honk"
477 noise := g.Title
478 convoy := "data:,acoustichonkytonk-" + xfiltrate()
479 date := time.Unix(g.Creation, 0)
480 audience := []string{thewholeworld}
481 honk := Honk{
482 UserID: user.ID,
483 Username: user.Name,
484 What: what,
485 Honker: user.URL,
486 XID: xid,
487 Date: date,
488 Format: "markdown",
489 Audience: audience,
490 Convoy: convoy,
491 Public: true,
492 Whofore: 2,
493 }
494 {
495 u := xfiltrate()
496 fname := fmt.Sprintf("%s/%s", source, g.URI)
497 data, err := ioutil.ReadFile(fname)
498 if err != nil {
499 elog.Printf("error reading media: %s", fname)
500 continue
501 }
502 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
503
504 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
505 if err != nil {
506 elog.Printf("error saving media: %s", fname)
507 continue
508 }
509 donk := &Donk{
510 FileID: fileid,
511 }
512 honk.Donks = append(honk.Donks, donk)
513 }
514 honk.Noise = noise
515 err := savehonk(&honk)
516 log.Printf("honk saved %v -> %v", xid, err)
517 }
518}
519
520func export(username, file string) {
521 user, err := butwhatabout(username)
522 if err != nil {
523 elog.Fatal(err)
524 }
525 fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
526 if err != nil {
527 elog.Fatal(err)
528 }
529 zd := zip.NewWriter(fd)
530 donks := make(map[string]bool)
531 {
532 w, err := zd.Create("outbox.json")
533 if err != nil {
534 elog.Fatal(err)
535 }
536 var jonks []junk.Junk
537 rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
538 honks := getsomehonks(rows, err)
539 for _, honk := range honks {
540 noise := honk.Noise
541 j, jo := jonkjonk(user, honk)
542 if honk.Format == "markdown" {
543 jo["source"] = noise
544 }
545 for _, donk := range honk.Donks {
546 donks[donk.XID] = true
547 }
548 jonks = append(jonks, j)
549 }
550 j := junk.New()
551 j["@context"] = itiswhatitis
552 j["id"] = user.URL + "/outbox"
553 j["attributedTo"] = user.URL
554 j["type"] = "OrderedCollection"
555 j["totalItems"] = len(jonks)
556 j["orderedItems"] = jonks
557 j.Write(w)
558 }
559 zd.Create("media/")
560 for donk := range donks {
561 var media string
562 var data []byte
563 w, err := zd.Create("media/" + donk)
564 if err != nil {
565 elog.Fatal(err)
566 }
567 row := stmtGetFileData.QueryRow(donk)
568 err = row.Scan(&media, &data)
569 if err != nil {
570 elog.Fatal(err)
571 }
572 w.Write(data)
573 }
574 zd.Close()
575 fd.Close()
576}