import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "encoding/csv"
20 "encoding/json"
21 "fmt"
22 "html"
23 "io/ioutil"
24 "log"
25 "os"
26 "regexp"
27 "sort"
28 "strings"
29 "time"
30)
31
32func importMain(username, flavor, source string) {
33 switch flavor {
34 case "mastodon":
35 importMastodon(username, source)
36 case "twitter":
37 importTwitter(username, source)
38 default:
39 elog.Fatal("unknown source flavor")
40 }
41}
42
43type TootObject struct {
44 Summary string
45 Content string
46 InReplyTo string
47 Conversation string
48 Published time.Time
49 Tag []struct {
50 Type string
51 Name string
52 }
53 Attachment []struct {
54 Type string
55 MediaType string
56 Url string
57 Name string
58 }
59}
60
61type PlainTootObject TootObject
62
63func (obj *TootObject) UnmarshalJSON(b []byte) error {
64 p := (*PlainTootObject)(obj)
65 json.Unmarshal(b, p)
66 return nil
67}
68
69func importMastodon(username, source string) {
70 user, err := butwhatabout(username)
71 if err != nil {
72 elog.Fatal(err)
73 }
74
75 if _, err := os.Stat(source + "/outbox.json"); err == nil {
76 importMastotoots(user, source)
77 } else {
78 ilog.Printf("skipping outbox.json!")
79 }
80 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
81 importMastotooters(user, source)
82 } else {
83 ilog.Printf("skipping following_accounts.csv!")
84 }
85}
86
87func importMastotoots(user *WhatAbout, source string) {
88 type Toot struct {
89 Id string
90 Type string
91 To []string
92 Cc []string
93 Object TootObject
94 }
95 var outbox struct {
96 OrderedItems []Toot
97 }
98 ilog.Println("Importing honks...")
99 fd, err := os.Open(source + "/outbox.json")
100 if err != nil {
101 elog.Fatal(err)
102 }
103 dec := json.NewDecoder(fd)
104 err = dec.Decode(&outbox)
105 if err != nil {
106 elog.Fatalf("error parsing json: %s", err)
107 }
108 fd.Close()
109
110 havetoot := func(xid string) bool {
111 var id int64
112 row := stmtFindXonk.QueryRow(user.ID, xid)
113 err := row.Scan(&id)
114 if err == nil {
115 return true
116 }
117 return false
118 }
119
120 re_tootid := regexp.MustCompile("[^/]+$")
121 for _, item := range outbox.OrderedItems {
122 toot := item
123 if toot.Type != "Create" {
124 continue
125 }
126 if strings.HasSuffix(toot.Id, "/activity") {
127 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
128 }
129 tootid := re_tootid.FindString(toot.Id)
130 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
131 if havetoot(xid) {
132 continue
133 }
134 honk := Honk{
135 UserID: user.ID,
136 What: "honk",
137 Honker: user.URL,
138 XID: xid,
139 RID: toot.Object.InReplyTo,
140 Date: toot.Object.Published,
141 URL: xid,
142 Audience: append(toot.To, toot.Cc...),
143 Noise: toot.Object.Content,
144 Convoy: toot.Object.Conversation,
145 Whofore: 2,
146 Format: "html",
147 Precis: toot.Object.Summary,
148 }
149 if honk.RID != "" {
150 honk.What = "tonk"
151 }
152 if !loudandproud(honk.Audience) {
153 honk.Whofore = 3
154 }
155 for _, att := range toot.Object.Attachment {
156 switch att.Type {
157 case "Document":
158 fname := fmt.Sprintf("%s/%s", source, att.Url)
159 data, err := ioutil.ReadFile(fname)
160 if err != nil {
161 elog.Printf("error reading media: %s", fname)
162 continue
163 }
164 u := xfiltrate()
165 name := att.Name
166 desc := name
167 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
168 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
169 if err != nil {
170 elog.Printf("error saving media: %s", fname)
171 continue
172 }
173 donk := &Donk{
174 FileID: fileid,
175 }
176 honk.Donks = append(honk.Donks, donk)
177 }
178 }
179 for _, t := range toot.Object.Tag {
180 switch t.Type {
181 case "Hashtag":
182 honk.Onts = append(honk.Onts, t.Name)
183 }
184 }
185 savehonk(&honk)
186 }
187}
188
189func importMastotooters(user *WhatAbout, source string) {
190 ilog.Println("Importing honkers...")
191 fd, err := os.Open(source + "/following_accounts.csv")
192 if err != nil {
193 elog.Fatal(err)
194 }
195 r := csv.NewReader(fd)
196 data, err := r.ReadAll()
197 if err != nil {
198 elog.Fatal(err)
199 }
200 fd.Close()
201
202 var meta HonkerMeta
203 mj, _ := jsonify(&meta)
204
205 for i, d := range data {
206 if i == 0 {
207 continue
208 }
209 url := "@" + d[0]
210 name := ""
211 flavor := "peep"
212 combos := ""
213 _, err := savehonker(user, url, name, flavor, combos, mj)
214 if err != nil {
215 elog.Printf("trouble with a honker: %s", err)
216 }
217 }
218}
219
220func importTwitter(username, source string) {
221 user, err := butwhatabout(username)
222 if err != nil {
223 elog.Fatal(err)
224 }
225
226 type Tweet struct {
227 date time.Time
228 convoy string
229 Tweet struct {
230 CreatedAt string `json:"created_at"`
231 DisplayTextRange []string `json:"display_text_range"`
232 EditInfo struct {
233 Initial struct {
234 EditTweetIds []string `json:"editTweetIds"`
235 EditableUntil string `json:"editableUntil"`
236 EditsRemaining string `json:"editsRemaining"`
237 IsEditEligible bool `json:"isEditEligible"`
238 } `json:"initial"`
239 } `json:"edit_info"`
240 Entities struct {
241 Hashtags []struct {
242 Indices []string `json:"indices"`
243 Text string `json:"text"`
244 } `json:"hashtags"`
245 Media []struct {
246 DisplayURL string `json:"display_url"`
247 ExpandedURL string `json:"expanded_url"`
248 ID string `json:"id"`
249 IdStr string `json:"id_str"`
250 Indices []string `json:"indices"`
251 MediaURL string `json:"media_url"`
252 MediaUrlHttps string `json:"media_url_https"`
253 Sizes struct {
254 Large struct {
255 H string `json:"h"`
256 Resize string `json:"resize"`
257 W string `json:"w"`
258 } `json:"large"`
259 Medium struct {
260 H string `json:"h"`
261 Resize string `json:"resize"`
262 W string `json:"w"`
263 } `json:"medium"`
264 Small struct {
265 H string `json:"h"`
266 Resize string `json:"resize"`
267 W string `json:"w"`
268 } `json:"small"`
269 Thumb struct {
270 H string `json:"h"`
271 Resize string `json:"resize"`
272 W string `json:"w"`
273 } `json:"thumb"`
274 } `json:"sizes"`
275 Type string `json:"type"`
276 URL string `json:"url"`
277 } `json:"media"`
278 Symbols []interface{} `json:"symbols"`
279 Urls []struct {
280 DisplayURL string `json:"display_url"`
281 ExpandedURL string `json:"expanded_url"`
282 Indices []string `json:"indices"`
283 URL string `json:"url"`
284 } `json:"urls"`
285 UserMentions []interface{} `json:"user_mentions"`
286 } `json:"entities"`
287 ExtendedEntities struct {
288 Media []struct {
289 DisplayURL string `json:"display_url"`
290 ExpandedURL string `json:"expanded_url"`
291 ID string `json:"id"`
292 IdStr string `json:"id_str"`
293 Indices []string `json:"indices"`
294 MediaURL string `json:"media_url"`
295 MediaUrlHttps string `json:"media_url_https"`
296 Sizes struct {
297 Large struct {
298 H string `json:"h"`
299 Resize string `json:"resize"`
300 W string `json:"w"`
301 } `json:"large"`
302 Medium struct {
303 H string `json:"h"`
304 Resize string `json:"resize"`
305 W string `json:"w"`
306 } `json:"medium"`
307 Small struct {
308 H string `json:"h"`
309 Resize string `json:"resize"`
310 W string `json:"w"`
311 } `json:"small"`
312 Thumb struct {
313 H string `json:"h"`
314 Resize string `json:"resize"`
315 W string `json:"w"`
316 } `json:"thumb"`
317 } `json:"sizes"`
318 Type string `json:"type"`
319 URL string `json:"url"`
320 } `json:"media"`
321 } `json:"extended_entities"`
322 FavoriteCount string `json:"favorite_count"`
323 Favorited bool `json:"favorited"`
324 FullText string `json:"full_text"`
325 ID string `json:"id"`
326 IdStr string `json:"id_str"`
327 InReplyToScreenName string `json:"in_reply_to_screen_name"`
328 InReplyToStatusID string `json:"in_reply_to_status_id"`
329 InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
330 InReplyToUserID string `json:"in_reply_to_user_id"`
331 InReplyToUserIdStr string `json:"in_reply_to_user_id_str"`
332 Lang string `json:"lang"`
333 PossiblySensitive bool `json:"possibly_sensitive"`
334 RetweetCount string `json:"retweet_count"`
335 Retweeted bool `json:"retweeted"`
336 Source string `json:"source"`
337 Truncated bool `json:"truncated"`
338 } `json:"tweet"`
339 }
340
341 var tweets []*Tweet
342 fd, err := os.Open(source + "/tweet.js")
343 if err != nil {
344 elog.Fatal(err)
345 }
346 // skip past window.YTD.tweet.part0 =
347 fd.Seek(25, 0)
348 dec := json.NewDecoder(fd)
349 err = dec.Decode(&tweets)
350 if err != nil {
351 elog.Fatalf("error parsing json: %s", err)
352 }
353 fd.Close()
354 tweetmap := make(map[string]*Tweet)
355 for _, t := range tweets {
356 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
357 tweetmap[t.Tweet.IdStr] = t
358 }
359 sort.Slice(tweets, func(i, j int) bool {
360 return tweets[i].date.Before(tweets[j].date)
361 })
362 havetwid := func(xid string) bool {
363 var id int64
364 row := stmtFindXonk.QueryRow(user.ID, xid)
365 err := row.Scan(&id)
366 if err == nil {
367 log.Printf("id = %v", id)
368 return true
369 }
370 return false
371 }
372 log.Printf("importing %v tweets", len(tweets))
373 for _, t := range tweets {
374 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
375 if havetwid(xid) {
376 continue
377 }
378
379 if t.Tweet.FavoriteCount == "0" || t.Tweet.FavoriteCount == "" {
380 log.Printf("skipping, unworthy tweet")
381 continue
382 }
383
384 what := "honk"
385 noise := ""
386 if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
387 t.convoy = parent.convoy
388 what = "tonk"
389 } else {
390 t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
391 if t.Tweet.InReplyToScreenName != "" {
392 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
393 t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
394 what = "tonk"
395 }
396 }
397 audience := []string{thewholeworld}
398 honk := Honk{
399 UserID: user.ID,
400 Username: user.Name,
401 What: what,
402 Honker: user.URL,
403 XID: xid,
404 Date: t.date,
405 Format: "markdown",
406 Audience: audience,
407 Convoy: t.convoy,
408 Public: true,
409 Whofore: 2,
410 }
411 noise += t.Tweet.FullText
412 // unbelievable
413 noise = html.UnescapeString(noise)
414 for _, r := range t.Tweet.Entities.Urls {
415 noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
416 }
417 for _, m := range t.Tweet.Entities.Media {
418 u := m.MediaURL
419 idx := strings.LastIndexByte(u, '/')
420 u = u[idx+1:]
421 fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
422 data, err := ioutil.ReadFile(fname)
423 if err != nil {
424 elog.Printf("error reading media: %s", fname)
425 continue
426 }
427 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
428
429 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
430 if err != nil {
431 elog.Printf("error saving media: %s", fname)
432 continue
433 }
434 donk := &Donk{
435 FileID: fileid,
436 }
437 honk.Donks = append(honk.Donks, donk)
438 noise = strings.Replace(noise, m.URL, "", -1)
439 }
440 for _, ht := range t.Tweet.Entities.Hashtags {
441 honk.Onts = append(honk.Onts, "#"+ht.Text)
442 }
443 honk.Noise = noise
444 err := savehonk(&honk)
445 log.Printf("honk saved %v -> %v", xid, err)
446 }
447}