import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "encoding/csv"
20 "encoding/json"
21 "fmt"
22 "html"
23 "io/ioutil"
24 "log"
25 "os"
26 "regexp"
27 "sort"
28 "strings"
29 "time"
30)
31
32func importMain(username, flavor, source string) {
33 switch flavor {
34 case "mastodon":
35 importMastodon(username, source)
36 case "twitter":
37 importTwitter(username, source)
38 case "instagram":
39 importInstagram(username, source)
40 default:
41 elog.Fatal("unknown source flavor")
42 }
43}
44
45type TootObject struct {
46 Summary string
47 Content string
48 InReplyTo string
49 Conversation string
50 Published time.Time
51 Tag []struct {
52 Type string
53 Name string
54 }
55 Attachment []struct {
56 Type string
57 MediaType string
58 Url string
59 Name string
60 }
61}
62
63type PlainTootObject TootObject
64
65func (obj *TootObject) UnmarshalJSON(b []byte) error {
66 p := (*PlainTootObject)(obj)
67 json.Unmarshal(b, p)
68 return nil
69}
70
71func importMastodon(username, source string) {
72 user, err := butwhatabout(username)
73 if err != nil {
74 elog.Fatal(err)
75 }
76
77 if _, err := os.Stat(source + "/outbox.json"); err == nil {
78 importMastotoots(user, source)
79 } else {
80 ilog.Printf("skipping outbox.json!")
81 }
82 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
83 importMastotooters(user, source)
84 } else {
85 ilog.Printf("skipping following_accounts.csv!")
86 }
87}
88
89func importMastotoots(user *WhatAbout, source string) {
90 type Toot struct {
91 Id string
92 Type string
93 To []string
94 Cc []string
95 Object TootObject
96 }
97 var outbox struct {
98 OrderedItems []Toot
99 }
100 ilog.Println("Importing honks...")
101 fd, err := os.Open(source + "/outbox.json")
102 if err != nil {
103 elog.Fatal(err)
104 }
105 dec := json.NewDecoder(fd)
106 err = dec.Decode(&outbox)
107 if err != nil {
108 elog.Fatalf("error parsing json: %s", err)
109 }
110 fd.Close()
111
112 havetoot := func(xid string) bool {
113 var id int64
114 row := stmtFindXonk.QueryRow(user.ID, xid)
115 err := row.Scan(&id)
116 if err == nil {
117 return true
118 }
119 return false
120 }
121
122 re_tootid := regexp.MustCompile("[^/]+$")
123 for _, item := range outbox.OrderedItems {
124 toot := item
125 if toot.Type != "Create" {
126 continue
127 }
128 if strings.HasSuffix(toot.Id, "/activity") {
129 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
130 }
131 tootid := re_tootid.FindString(toot.Id)
132 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
133 if havetoot(xid) {
134 continue
135 }
136 honk := Honk{
137 UserID: user.ID,
138 What: "honk",
139 Honker: user.URL,
140 XID: xid,
141 RID: toot.Object.InReplyTo,
142 Date: toot.Object.Published,
143 URL: xid,
144 Audience: append(toot.To, toot.Cc...),
145 Noise: toot.Object.Content,
146 Convoy: toot.Object.Conversation,
147 Whofore: 2,
148 Format: "html",
149 Precis: toot.Object.Summary,
150 }
151 if !loudandproud(honk.Audience) {
152 honk.Whofore = 3
153 }
154 for _, att := range toot.Object.Attachment {
155 switch att.Type {
156 case "Document":
157 fname := fmt.Sprintf("%s/%s", source, att.Url)
158 data, err := ioutil.ReadFile(fname)
159 if err != nil {
160 elog.Printf("error reading media: %s", fname)
161 continue
162 }
163 u := xfiltrate()
164 name := att.Name
165 desc := name
166 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
167 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
168 if err != nil {
169 elog.Printf("error saving media: %s", fname)
170 continue
171 }
172 donk := &Donk{
173 FileID: fileid,
174 }
175 honk.Donks = append(honk.Donks, donk)
176 }
177 }
178 for _, t := range toot.Object.Tag {
179 switch t.Type {
180 case "Hashtag":
181 honk.Onts = append(honk.Onts, t.Name)
182 }
183 }
184 savehonk(&honk)
185 }
186}
187
188func importMastotooters(user *WhatAbout, source string) {
189 ilog.Println("Importing honkers...")
190 fd, err := os.Open(source + "/following_accounts.csv")
191 if err != nil {
192 elog.Fatal(err)
193 }
194 r := csv.NewReader(fd)
195 data, err := r.ReadAll()
196 if err != nil {
197 elog.Fatal(err)
198 }
199 fd.Close()
200
201 var meta HonkerMeta
202 mj, _ := jsonify(&meta)
203
204 for i, d := range data {
205 if i == 0 {
206 continue
207 }
208 url := "@" + d[0]
209 name := ""
210 flavor := "peep"
211 combos := ""
212 _, err := savehonker(user, url, name, flavor, combos, mj)
213 if err != nil {
214 elog.Printf("trouble with a honker: %s", err)
215 }
216 }
217}
218
219func importTwitter(username, source string) {
220 user, err := butwhatabout(username)
221 if err != nil {
222 elog.Fatal(err)
223 }
224
225 type Tweet struct {
226 date time.Time
227 convoy string
228 Tweet struct {
229 CreatedAt string `json:"created_at"`
230 DisplayTextRange []string `json:"display_text_range"`
231 EditInfo struct {
232 Initial struct {
233 EditTweetIds []string `json:"editTweetIds"`
234 EditableUntil string `json:"editableUntil"`
235 EditsRemaining string `json:"editsRemaining"`
236 IsEditEligible bool `json:"isEditEligible"`
237 } `json:"initial"`
238 } `json:"edit_info"`
239 Entities struct {
240 Hashtags []struct {
241 Indices []string `json:"indices"`
242 Text string `json:"text"`
243 } `json:"hashtags"`
244 Media []struct {
245 DisplayURL string `json:"display_url"`
246 ExpandedURL string `json:"expanded_url"`
247 ID string `json:"id"`
248 IdStr string `json:"id_str"`
249 Indices []string `json:"indices"`
250 MediaURL string `json:"media_url"`
251 MediaUrlHttps string `json:"media_url_https"`
252 Sizes struct {
253 Large struct {
254 H string `json:"h"`
255 Resize string `json:"resize"`
256 W string `json:"w"`
257 } `json:"large"`
258 Medium struct {
259 H string `json:"h"`
260 Resize string `json:"resize"`
261 W string `json:"w"`
262 } `json:"medium"`
263 Small struct {
264 H string `json:"h"`
265 Resize string `json:"resize"`
266 W string `json:"w"`
267 } `json:"small"`
268 Thumb struct {
269 H string `json:"h"`
270 Resize string `json:"resize"`
271 W string `json:"w"`
272 } `json:"thumb"`
273 } `json:"sizes"`
274 Type string `json:"type"`
275 URL string `json:"url"`
276 } `json:"media"`
277 Symbols []interface{} `json:"symbols"`
278 Urls []struct {
279 DisplayURL string `json:"display_url"`
280 ExpandedURL string `json:"expanded_url"`
281 Indices []string `json:"indices"`
282 URL string `json:"url"`
283 } `json:"urls"`
284 UserMentions []interface{} `json:"user_mentions"`
285 } `json:"entities"`
286 ExtendedEntities struct {
287 Media []struct {
288 DisplayURL string `json:"display_url"`
289 ExpandedURL string `json:"expanded_url"`
290 ID string `json:"id"`
291 IdStr string `json:"id_str"`
292 Indices []string `json:"indices"`
293 MediaURL string `json:"media_url"`
294 MediaUrlHttps string `json:"media_url_https"`
295 Sizes struct {
296 Large struct {
297 H string `json:"h"`
298 Resize string `json:"resize"`
299 W string `json:"w"`
300 } `json:"large"`
301 Medium struct {
302 H string `json:"h"`
303 Resize string `json:"resize"`
304 W string `json:"w"`
305 } `json:"medium"`
306 Small struct {
307 H string `json:"h"`
308 Resize string `json:"resize"`
309 W string `json:"w"`
310 } `json:"small"`
311 Thumb struct {
312 H string `json:"h"`
313 Resize string `json:"resize"`
314 W string `json:"w"`
315 } `json:"thumb"`
316 } `json:"sizes"`
317 Type string `json:"type"`
318 URL string `json:"url"`
319 } `json:"media"`
320 } `json:"extended_entities"`
321 FavoriteCount string `json:"favorite_count"`
322 Favorited bool `json:"favorited"`
323 FullText string `json:"full_text"`
324 ID string `json:"id"`
325 IdStr string `json:"id_str"`
326 InReplyToScreenName string `json:"in_reply_to_screen_name"`
327 InReplyToStatusID string `json:"in_reply_to_status_id"`
328 InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
329 InReplyToUserID string `json:"in_reply_to_user_id"`
330 InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
331 Lang string `json:"lang"`
332 PossiblySensitive bool `json:"possibly_sensitive"`
333 RetweetCount string `json:"retweet_count"`
334 Retweeted bool `json:"retweeted"`
335 Source string `json:"source"`
336 Truncated bool `json:"truncated"`
337 } `json:"tweet"`
338 }
339
340 var tweets []*Tweet
341 fd, err := os.Open(source + "/tweets.js")
342 if err != nil {
343 elog.Fatal(err)
344 }
345 // skip past window.YTD.tweet.part0 =
346 fd.Seek(25, 0)
347 dec := json.NewDecoder(fd)
348 err = dec.Decode(&tweets)
349 if err != nil {
350 elog.Fatalf("error parsing json: %s", err)
351 }
352 fd.Close()
353 tweetmap := make(map[string]*Tweet)
354 for _, t := range tweets {
355 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
356 tweetmap[t.Tweet.IdStr] = t
357 }
358 sort.Slice(tweets, func(i, j int) bool {
359 return tweets[i].date.Before(tweets[j].date)
360 })
361 havetwid := func(xid string) bool {
362 var id int64
363 row := stmtFindXonk.QueryRow(user.ID, xid)
364 err := row.Scan(&id)
365 if err == nil {
366 log.Printf("id = %v", id)
367 return true
368 }
369 return false
370 }
371 log.Printf("importing %v tweets", len(tweets))
372 for _, t := range tweets {
373 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
374 if havetwid(xid) {
375 continue
376 }
377
378 what := "honk"
379 noise := ""
380 if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
381 t.convoy = parent.convoy
382 } else {
383 t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
384 if t.Tweet.InReplyToScreenName != "" {
385 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
386 t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
387 }
388 }
389 audience := []string{thewholeworld}
390 honk := Honk{
391 UserID: user.ID,
392 Username: user.Name,
393 What: what,
394 Honker: user.URL,
395 XID: xid,
396 Date: t.date,
397 Format: "markdown",
398 Audience: audience,
399 Convoy: t.convoy,
400 Public: true,
401 Whofore: 2,
402 }
403 noise += t.Tweet.FullText
404 // unbelievable
405 noise = html.UnescapeString(noise)
406 for _, r := range t.Tweet.Entities.Urls {
407 noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
408 }
409 for _, m := range t.Tweet.Entities.Media {
410 u := m.MediaURL
411 idx := strings.LastIndexByte(u, '/')
412 u = u[idx+1:]
413 fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
414 data, err := ioutil.ReadFile(fname)
415 if err != nil {
416 elog.Printf("error reading media: %s", fname)
417 continue
418 }
419 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
420
421 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
422 if err != nil {
423 elog.Printf("error saving media: %s", fname)
424 continue
425 }
426 donk := &Donk{
427 FileID: fileid,
428 }
429 honk.Donks = append(honk.Donks, donk)
430 noise = strings.Replace(noise, m.URL, "", -1)
431 }
432 for _, ht := range t.Tweet.Entities.Hashtags {
433 honk.Onts = append(honk.Onts, "#"+ht.Text)
434 }
435 honk.Noise = noise
436 err := savehonk(&honk)
437 log.Printf("honk saved %v -> %v", xid, err)
438 }
439}
440
441func importInstagram(username, source string) {
442 user, err := butwhatabout(username)
443 if err != nil {
444 elog.Fatal(err)
445 }
446
447 type Gram struct {
448 Media []struct {
449 URI string
450 Creation int64 `json:"creation_timestamp"`
451 Title string
452 }
453 }
454
455 var grams []*Gram
456 fd, err := os.Open(source + "/content/posts_1.json")
457 if err != nil {
458 elog.Fatal(err)
459 }
460 dec := json.NewDecoder(fd)
461 err = dec.Decode(&grams)
462 if err != nil {
463 elog.Fatalf("error parsing json: %s", err)
464 }
465 fd.Close()
466 log.Printf("importing %d grams", len(grams))
467 sort.Slice(grams, func(i, j int) bool {
468 return grams[i].Media[0].Creation < grams[j].Media[0].Creation
469 })
470 for _, g0 := range grams {
471 g := g0.Media[0]
472 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
473 what := "honk"
474 noise := g.Title
475 convoy := "data:,acoustichonkytonk-" + xfiltrate()
476 date := time.Unix(g.Creation, 0)
477 audience := []string{thewholeworld}
478 honk := Honk{
479 UserID: user.ID,
480 Username: user.Name,
481 What: what,
482 Honker: user.URL,
483 XID: xid,
484 Date: date,
485 Format: "markdown",
486 Audience: audience,
487 Convoy: convoy,
488 Public: true,
489 Whofore: 2,
490 }
491 {
492 u := xfiltrate()
493 fname := fmt.Sprintf("%s/%s", source, g.URI)
494 data, err := ioutil.ReadFile(fname)
495 if err != nil {
496 elog.Printf("error reading media: %s", fname)
497 continue
498 }
499 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
500
501 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
502 if err != nil {
503 elog.Printf("error saving media: %s", fname)
504 continue
505 }
506 donk := &Donk{
507 FileID: fileid,
508 }
509 honk.Donks = append(honk.Donks, donk)
510 }
511 honk.Noise = noise
512 err := savehonk(&honk)
513 log.Printf("honk saved %v -> %v", xid, err)
514 }
515}