import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "encoding/csv"
20 "encoding/json"
21 "fmt"
22 "html"
23 "io/ioutil"
24 "log"
25 "os"
26 "regexp"
27 "sort"
28 "strings"
29 "time"
30)
31
32func importMain(username, flavor, source string) {
33 switch flavor {
34 case "mastodon":
35 importMastodon(username, source)
36 case "twitter":
37 importTwitter(username, source)
38 case "instagram":
39 importInstagram(username, source)
40 default:
41 elog.Fatal("unknown source flavor")
42 }
43}
44
45type TootObject struct {
46 Summary string
47 Content string
48 InReplyTo string
49 Conversation string
50 Published time.Time
51 Tag []struct {
52 Type string
53 Name string
54 }
55 Attachment []struct {
56 Type string
57 MediaType string
58 Url string
59 Name string
60 }
61}
62
63type PlainTootObject TootObject
64
65func (obj *TootObject) UnmarshalJSON(b []byte) error {
66 p := (*PlainTootObject)(obj)
67 json.Unmarshal(b, p)
68 return nil
69}
70
71func importMastodon(username, source string) {
72 user, err := butwhatabout(username)
73 if err != nil {
74 elog.Fatal(err)
75 }
76
77 if _, err := os.Stat(source + "/outbox.json"); err == nil {
78 importMastotoots(user, source)
79 } else {
80 ilog.Printf("skipping outbox.json!")
81 }
82 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
83 importMastotooters(user, source)
84 } else {
85 ilog.Printf("skipping following_accounts.csv!")
86 }
87}
88
89func importMastotoots(user *WhatAbout, source string) {
90 type Toot struct {
91 Id string
92 Type string
93 To []string
94 Cc []string
95 Object TootObject
96 }
97 var outbox struct {
98 OrderedItems []Toot
99 }
100 ilog.Println("Importing honks...")
101 fd, err := os.Open(source + "/outbox.json")
102 if err != nil {
103 elog.Fatal(err)
104 }
105 dec := json.NewDecoder(fd)
106 err = dec.Decode(&outbox)
107 if err != nil {
108 elog.Fatalf("error parsing json: %s", err)
109 }
110 fd.Close()
111
112 havetoot := func(xid string) bool {
113 var id int64
114 row := stmtFindXonk.QueryRow(user.ID, xid)
115 err := row.Scan(&id)
116 if err == nil {
117 return true
118 }
119 return false
120 }
121
122 re_tootid := regexp.MustCompile("[^/]+$")
123 for _, item := range outbox.OrderedItems {
124 toot := item
125 if toot.Type != "Create" {
126 continue
127 }
128 if strings.HasSuffix(toot.Id, "/activity") {
129 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
130 }
131 tootid := re_tootid.FindString(toot.Id)
132 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
133 if havetoot(xid) {
134 continue
135 }
136 honk := Honk{
137 UserID: user.ID,
138 What: "honk",
139 Honker: user.URL,
140 XID: xid,
141 RID: toot.Object.InReplyTo,
142 Date: toot.Object.Published,
143 URL: xid,
144 Audience: append(toot.To, toot.Cc...),
145 Noise: toot.Object.Content,
146 Convoy: toot.Object.Conversation,
147 Whofore: 2,
148 Format: "html",
149 Precis: toot.Object.Summary,
150 }
151 if honk.RID != "" {
152 honk.What = "tonk"
153 }
154 if !loudandproud(honk.Audience) {
155 honk.Whofore = 3
156 }
157 for _, att := range toot.Object.Attachment {
158 switch att.Type {
159 case "Document":
160 fname := fmt.Sprintf("%s/%s", source, att.Url)
161 data, err := ioutil.ReadFile(fname)
162 if err != nil {
163 elog.Printf("error reading media: %s", fname)
164 continue
165 }
166 u := xfiltrate()
167 name := att.Name
168 desc := name
169 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
170 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
171 if err != nil {
172 elog.Printf("error saving media: %s", fname)
173 continue
174 }
175 donk := &Donk{
176 FileID: fileid,
177 }
178 honk.Donks = append(honk.Donks, donk)
179 }
180 }
181 for _, t := range toot.Object.Tag {
182 switch t.Type {
183 case "Hashtag":
184 honk.Onts = append(honk.Onts, t.Name)
185 }
186 }
187 savehonk(&honk)
188 }
189}
190
191func importMastotooters(user *WhatAbout, source string) {
192 ilog.Println("Importing honkers...")
193 fd, err := os.Open(source + "/following_accounts.csv")
194 if err != nil {
195 elog.Fatal(err)
196 }
197 r := csv.NewReader(fd)
198 data, err := r.ReadAll()
199 if err != nil {
200 elog.Fatal(err)
201 }
202 fd.Close()
203
204 var meta HonkerMeta
205 mj, _ := jsonify(&meta)
206
207 for i, d := range data {
208 if i == 0 {
209 continue
210 }
211 url := "@" + d[0]
212 name := ""
213 flavor := "peep"
214 combos := ""
215 _, err := savehonker(user, url, name, flavor, combos, mj)
216 if err != nil {
217 elog.Printf("trouble with a honker: %s", err)
218 }
219 }
220}
221
222func importTwitter(username, source string) {
223 user, err := butwhatabout(username)
224 if err != nil {
225 elog.Fatal(err)
226 }
227
228 type Tweet struct {
229 date time.Time
230 convoy string
231 Tweet struct {
232 CreatedAt string `json:"created_at"`
233 DisplayTextRange []string `json:"display_text_range"`
234 EditInfo struct {
235 Initial struct {
236 EditTweetIds []string `json:"editTweetIds"`
237 EditableUntil string `json:"editableUntil"`
238 EditsRemaining string `json:"editsRemaining"`
239 IsEditEligible bool `json:"isEditEligible"`
240 } `json:"initial"`
241 } `json:"edit_info"`
242 Entities struct {
243 Hashtags []struct {
244 Indices []string `json:"indices"`
245 Text string `json:"text"`
246 } `json:"hashtags"`
247 Media []struct {
248 DisplayURL string `json:"display_url"`
249 ExpandedURL string `json:"expanded_url"`
250 ID string `json:"id"`
251 IdStr string `json:"id_str"`
252 Indices []string `json:"indices"`
253 MediaURL string `json:"media_url"`
254 MediaUrlHttps string `json:"media_url_https"`
255 Sizes struct {
256 Large struct {
257 H string `json:"h"`
258 Resize string `json:"resize"`
259 W string `json:"w"`
260 } `json:"large"`
261 Medium struct {
262 H string `json:"h"`
263 Resize string `json:"resize"`
264 W string `json:"w"`
265 } `json:"medium"`
266 Small struct {
267 H string `json:"h"`
268 Resize string `json:"resize"`
269 W string `json:"w"`
270 } `json:"small"`
271 Thumb struct {
272 H string `json:"h"`
273 Resize string `json:"resize"`
274 W string `json:"w"`
275 } `json:"thumb"`
276 } `json:"sizes"`
277 Type string `json:"type"`
278 URL string `json:"url"`
279 } `json:"media"`
280 Symbols []interface{} `json:"symbols"`
281 Urls []struct {
282 DisplayURL string `json:"display_url"`
283 ExpandedURL string `json:"expanded_url"`
284 Indices []string `json:"indices"`
285 URL string `json:"url"`
286 } `json:"urls"`
287 UserMentions []interface{} `json:"user_mentions"`
288 } `json:"entities"`
289 ExtendedEntities struct {
290 Media []struct {
291 DisplayURL string `json:"display_url"`
292 ExpandedURL string `json:"expanded_url"`
293 ID string `json:"id"`
294 IdStr string `json:"id_str"`
295 Indices []string `json:"indices"`
296 MediaURL string `json:"media_url"`
297 MediaUrlHttps string `json:"media_url_https"`
298 Sizes struct {
299 Large struct {
300 H string `json:"h"`
301 Resize string `json:"resize"`
302 W string `json:"w"`
303 } `json:"large"`
304 Medium struct {
305 H string `json:"h"`
306 Resize string `json:"resize"`
307 W string `json:"w"`
308 } `json:"medium"`
309 Small struct {
310 H string `json:"h"`
311 Resize string `json:"resize"`
312 W string `json:"w"`
313 } `json:"small"`
314 Thumb struct {
315 H string `json:"h"`
316 Resize string `json:"resize"`
317 W string `json:"w"`
318 } `json:"thumb"`
319 } `json:"sizes"`
320 Type string `json:"type"`
321 URL string `json:"url"`
322 } `json:"media"`
323 } `json:"extended_entities"`
324 FavoriteCount string `json:"favorite_count"`
325 Favorited bool `json:"favorited"`
326 FullText string `json:"full_text"`
327 ID string `json:"id"`
328 IdStr string `json:"id_str"`
329 InReplyToScreenName string `json:"in_reply_to_screen_name"`
330 InReplyToStatusID string `json:"in_reply_to_status_id"`
331 InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
332 InReplyToUserID string `json:"in_reply_to_user_id"`
333 InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
334 Lang string `json:"lang"`
335 PossiblySensitive bool `json:"possibly_sensitive"`
336 RetweetCount string `json:"retweet_count"`
337 Retweeted bool `json:"retweeted"`
338 Source string `json:"source"`
339 Truncated bool `json:"truncated"`
340 } `json:"tweet"`
341 }
342
343 var tweets []*Tweet
344 fd, err := os.Open(source + "/tweet.js")
345 if err != nil {
346 elog.Fatal(err)
347 }
348 // skip past window.YTD.tweet.part0 =
349 fd.Seek(25, 0)
350 dec := json.NewDecoder(fd)
351 err = dec.Decode(&tweets)
352 if err != nil {
353 elog.Fatalf("error parsing json: %s", err)
354 }
355 fd.Close()
356 tweetmap := make(map[string]*Tweet)
357 for _, t := range tweets {
358 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
359 tweetmap[t.Tweet.IdStr] = t
360 }
361 sort.Slice(tweets, func(i, j int) bool {
362 return tweets[i].date.Before(tweets[j].date)
363 })
364 havetwid := func(xid string) bool {
365 var id int64
366 row := stmtFindXonk.QueryRow(user.ID, xid)
367 err := row.Scan(&id)
368 if err == nil {
369 log.Printf("id = %v", id)
370 return true
371 }
372 return false
373 }
374 log.Printf("importing %v tweets", len(tweets))
375 for _, t := range tweets {
376 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
377 if havetwid(xid) {
378 continue
379 }
380
381 if t.Tweet.FavoriteCount == "0" || t.Tweet.FavoriteCount == "" {
382 log.Printf("skipping, unworthy tweet")
383 continue
384 }
385
386 what := "honk"
387 noise := ""
388 if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
389 t.convoy = parent.convoy
390 what = "tonk"
391 } else {
392 t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
393 if t.Tweet.InReplyToScreenName != "" {
394 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
395 t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
396 what = "tonk"
397 }
398 }
399 audience := []string{thewholeworld}
400 honk := Honk{
401 UserID: user.ID,
402 Username: user.Name,
403 What: what,
404 Honker: user.URL,
405 XID: xid,
406 Date: t.date,
407 Format: "markdown",
408 Audience: audience,
409 Convoy: t.convoy,
410 Public: true,
411 Whofore: 2,
412 }
413 noise += t.Tweet.FullText
414 // unbelievable
415 noise = html.UnescapeString(noise)
416 for _, r := range t.Tweet.Entities.Urls {
417 noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
418 }
419 for _, m := range t.Tweet.Entities.Media {
420 u := m.MediaURL
421 idx := strings.LastIndexByte(u, '/')
422 u = u[idx+1:]
423 fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
424 data, err := ioutil.ReadFile(fname)
425 if err != nil {
426 elog.Printf("error reading media: %s", fname)
427 continue
428 }
429 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
430
431 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
432 if err != nil {
433 elog.Printf("error saving media: %s", fname)
434 continue
435 }
436 donk := &Donk{
437 FileID: fileid,
438 }
439 honk.Donks = append(honk.Donks, donk)
440 noise = strings.Replace(noise, m.URL, "", -1)
441 }
442 for _, ht := range t.Tweet.Entities.Hashtags {
443 honk.Onts = append(honk.Onts, "#"+ht.Text)
444 }
445 honk.Noise = noise
446 err := savehonk(&honk)
447 log.Printf("honk saved %v -> %v", xid, err)
448 }
449}
450
451func importInstagram(username, source string) {
452 user, err := butwhatabout(username)
453 if err != nil {
454 elog.Fatal(err)
455 }
456
457 type Gram struct {
458 Media []struct {
459 URI string
460 Creation int64 `json:"creation_timestamp"`
461 Title string
462 }
463 }
464
465 var grams []*Gram
466 fd, err := os.Open(source + "/content/posts_1.json")
467 if err != nil {
468 elog.Fatal(err)
469 }
470 dec := json.NewDecoder(fd)
471 err = dec.Decode(&grams)
472 if err != nil {
473 elog.Fatalf("error parsing json: %s", err)
474 }
475 fd.Close()
476 log.Printf("importing %d grams", len(grams))
477 sort.Slice(grams, func(i, j int) bool {
478 return grams[i].Media[0].Creation < grams[j].Media[0].Creation
479 })
480 for _, g0 := range grams {
481 g := g0.Media[0]
482 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
483 what := "honk"
484 noise := g.Title
485 convoy := "data:,acoustichonkytonk-" + xfiltrate()
486 date := time.Unix(g.Creation, 0)
487 audience := []string{thewholeworld}
488 honk := Honk{
489 UserID: user.ID,
490 Username: user.Name,
491 What: what,
492 Honker: user.URL,
493 XID: xid,
494 Date: date,
495 Format: "markdown",
496 Audience: audience,
497 Convoy: convoy,
498 Public: true,
499 Whofore: 2,
500 }
501 {
502 u := xfiltrate()
503 fname := fmt.Sprintf("%s/%s", source, g.URI)
504 data, err := ioutil.ReadFile(fname)
505 if err != nil {
506 elog.Printf("error reading media: %s", fname)
507 continue
508 }
509 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
510
511 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
512 if err != nil {
513 elog.Printf("error saving media: %s", fname)
514 continue
515 }
516 donk := &Donk{
517 FileID: fileid,
518 }
519 honk.Donks = append(honk.Donks, donk)
520 }
521 honk.Noise = noise
522 err := savehonk(&honk)
523 log.Printf("honk saved %v -> %v", xid, err)
524 }
525}