import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "encoding/csv"
20 "encoding/json"
21 "fmt"
22 "html"
23 "io/ioutil"
24 "os"
25 "regexp"
26 "sort"
27 "strings"
28 "time"
29)
30
31func importMain(username, flavor, source string) {
32 switch flavor {
33 case "mastodon":
34 importMastodon(username, source)
35 case "twitter":
36 importTwitter(username, source)
37 default:
38 elog.Fatal("unknown source flavor")
39 }
40}
41
42type TootObject struct {
43 Summary string
44 Content string
45 InReplyTo string
46 Conversation string
47 Published time.Time
48 Tag []struct {
49 Type string
50 Name string
51 }
52 Attachment []struct {
53 Type string
54 MediaType string
55 Url string
56 Name string
57 }
58}
59
60type PlainTootObject TootObject
61
62func (obj *TootObject) UnmarshalJSON(b []byte) error {
63 p := (*PlainTootObject)(obj)
64 json.Unmarshal(b, p)
65 return nil
66}
67
68func importMastodon(username, source string) {
69 user, err := butwhatabout(username)
70 if err != nil {
71 elog.Fatal(err)
72 }
73
74 if _, err := os.Stat(source + "/outbox.json"); err == nil {
75 importMastotoots(user, source)
76 } else {
77 ilog.Printf("skipping outbox.json!")
78 }
79 if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
80 importMastotooters(user, source)
81 } else {
82 ilog.Printf("skipping following_accounts.csv!")
83 }
84}
85
86func importMastotoots(user *WhatAbout, source string) {
87 type Toot struct {
88 Id string
89 Type string
90 To []string
91 Cc []string
92 Object TootObject
93 }
94 var outbox struct {
95 OrderedItems []Toot
96 }
97 ilog.Println("Importing honks...")
98 fd, err := os.Open(source + "/outbox.json")
99 if err != nil {
100 elog.Fatal(err)
101 }
102 dec := json.NewDecoder(fd)
103 err = dec.Decode(&outbox)
104 if err != nil {
105 elog.Fatalf("error parsing json: %s", err)
106 }
107 fd.Close()
108
109 havetoot := func(xid string) bool {
110 var id int64
111 row := stmtFindXonk.QueryRow(user.ID, xid)
112 err := row.Scan(&id)
113 if err == nil {
114 return true
115 }
116 return false
117 }
118
119 re_tootid := regexp.MustCompile("[^/]+$")
120 for _, item := range outbox.OrderedItems {
121 toot := item
122 if toot.Type != "Create" {
123 continue
124 }
125 if strings.HasSuffix(toot.Id, "/activity") {
126 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
127 }
128 tootid := re_tootid.FindString(toot.Id)
129 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
130 if havetoot(xid) {
131 continue
132 }
133 honk := Honk{
134 UserID: user.ID,
135 What: "honk",
136 Honker: user.URL,
137 XID: xid,
138 RID: toot.Object.InReplyTo,
139 Date: toot.Object.Published,
140 URL: xid,
141 Audience: append(toot.To, toot.Cc...),
142 Noise: toot.Object.Content,
143 Convoy: toot.Object.Conversation,
144 Whofore: 2,
145 Format: "html",
146 Precis: toot.Object.Summary,
147 }
148 if honk.RID != "" {
149 honk.What = "tonk"
150 }
151 if !loudandproud(honk.Audience) {
152 honk.Whofore = 3
153 }
154 for _, att := range toot.Object.Attachment {
155 switch att.Type {
156 case "Document":
157 fname := fmt.Sprintf("%s/%s", source, att.Url)
158 data, err := ioutil.ReadFile(fname)
159 if err != nil {
160 elog.Printf("error reading media: %s", fname)
161 continue
162 }
163 u := xfiltrate()
164 name := att.Name
165 desc := name
166 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
167 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
168 if err != nil {
169 elog.Printf("error saving media: %s", fname)
170 continue
171 }
172 donk := &Donk{
173 FileID: fileid,
174 }
175 honk.Donks = append(honk.Donks, donk)
176 }
177 }
178 for _, t := range toot.Object.Tag {
179 switch t.Type {
180 case "Hashtag":
181 honk.Onts = append(honk.Onts, t.Name)
182 }
183 }
184 savehonk(&honk)
185 }
186}
187
188func importMastotooters(user *WhatAbout, source string) {
189 ilog.Println("Importing honkers...")
190 fd, err := os.Open(source + "/following_accounts.csv")
191 if err != nil {
192 elog.Fatal(err)
193 }
194 r := csv.NewReader(fd)
195 data, err := r.ReadAll()
196 if err != nil {
197 elog.Fatal(err)
198 }
199 fd.Close()
200
201 var meta HonkerMeta
202 mj, _ := jsonify(&meta)
203
204 for i, d := range data {
205 if i == 0 {
206 continue
207 }
208 url := "@" + d[0]
209 name := ""
210 flavor := "peep"
211 combos := ""
212 err := savehonker(user, url, name, flavor, combos, mj)
213 if err != nil {
214 elog.Printf("trouble with a honker: %s", err)
215 }
216 }
217}
218
219func importTwitter(username, source string) {
220 user, err := butwhatabout(username)
221 if err != nil {
222 elog.Fatal(err)
223 }
224
225 type Tweet struct {
226 ID_str string
227 Created_at string
228 Full_text string
229 In_reply_to_screen_name string
230 In_reply_to_status_id string
231 Entities struct {
232 Hashtags []struct {
233 Text string
234 }
235 Media []struct {
236 Url string
237 Media_url string
238 }
239 Urls []struct {
240 Url string
241 Expanded_url string
242 }
243 }
244 date time.Time
245 convoy string
246 }
247
248 var tweets []*Tweet
249 fd, err := os.Open(source + "/tweet.js")
250 if err != nil {
251 elog.Fatal(err)
252 }
253 // skip past window.YTD.tweet.part0 =
254 fd.Seek(25, 0)
255 dec := json.NewDecoder(fd)
256 err = dec.Decode(&tweets)
257 if err != nil {
258 elog.Fatalf("error parsing json: %s", err)
259 }
260 fd.Close()
261 tweetmap := make(map[string]*Tweet)
262 for _, t := range tweets {
263 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
264 tweetmap[t.ID_str] = t
265 }
266 sort.Slice(tweets, func(i, j int) bool {
267 return tweets[i].date.Before(tweets[j].date)
268 })
269 havetwid := func(xid string) bool {
270 var id int64
271 row := stmtFindXonk.QueryRow(user.ID, xid)
272 err := row.Scan(&id)
273 if err == nil {
274 return true
275 }
276 return false
277 }
278
279 for _, t := range tweets {
280 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
281 if havetwid(xid) {
282 continue
283 }
284 what := "honk"
285 noise := ""
286 if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
287 t.convoy = parent.convoy
288 what = "tonk"
289 } else {
290 t.convoy = "data:,acoustichonkytonk-" + t.ID_str
291 if t.In_reply_to_screen_name != "" {
292 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
293 t.In_reply_to_screen_name, t.In_reply_to_status_id)
294 what = "tonk"
295 }
296 }
297 audience := []string{thewholeworld}
298 honk := Honk{
299 UserID: user.ID,
300 Username: user.Name,
301 What: what,
302 Honker: user.URL,
303 XID: xid,
304 Date: t.date,
305 Format: "markdown",
306 Audience: audience,
307 Convoy: t.convoy,
308 Public: true,
309 Whofore: 2,
310 }
311 noise += t.Full_text
312 // unbelievable
313 noise = html.UnescapeString(noise)
314 for _, r := range t.Entities.Urls {
315 noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
316 }
317 for _, m := range t.Entities.Media {
318 u := m.Media_url
319 idx := strings.LastIndexByte(u, '/')
320 u = u[idx+1:]
321 fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
322 data, err := ioutil.ReadFile(fname)
323 if err != nil {
324 elog.Printf("error reading media: %s", fname)
325 continue
326 }
327 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
328
329 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
330 if err != nil {
331 elog.Printf("error saving media: %s", fname)
332 continue
333 }
334 donk := &Donk{
335 FileID: fileid,
336 }
337 honk.Donks = append(honk.Donks, donk)
338 noise = strings.Replace(noise, m.Url, "", -1)
339 }
340 for _, ht := range t.Entities.Hashtags {
341 honk.Onts = append(honk.Onts, "#"+ht.Text)
342 }
343 honk.Noise = noise
344 savehonk(&honk)
345 }
346}