import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "encoding/json"
20 "fmt"
21 "html"
22 "io/ioutil"
23 "os"
24 "regexp"
25 "sort"
26 "strings"
27 "time"
28)
29
30func importMain(username, flavor, source string) {
31 switch flavor {
32 case "mastodon":
33 importMastodon(username, source)
34 case "twitter":
35 importTwitter(username, source)
36 default:
37 elog.Fatal("unknown source flavor")
38 }
39}
40
41type TootObject struct {
42 Summary string
43 Content string
44 InReplyTo string
45 Conversation string
46 Published time.Time
47 Tag []struct {
48 Type string
49 Name string
50 }
51 Attachment []struct {
52 Type string
53 MediaType string
54 Url string
55 Name string
56 }
57}
58
59type PlainTootObject TootObject
60
61func (obj *TootObject) UnmarshalJSON(b []byte) error {
62 p := (*PlainTootObject)(obj)
63 json.Unmarshal(b, p)
64 return nil
65}
66
67func importMastodon(username, source string) {
68 user, err := butwhatabout(username)
69 if err != nil {
70 elog.Fatal(err)
71 }
72 type Toot struct {
73 Id string
74 Type string
75 To []string
76 Cc []string
77 Object TootObject
78 }
79 var outbox struct {
80 OrderedItems []Toot
81 }
82 fd, err := os.Open(source + "/outbox.json")
83 if err != nil {
84 elog.Fatal(err)
85 }
86 dec := json.NewDecoder(fd)
87 err = dec.Decode(&outbox)
88 if err != nil {
89 elog.Fatalf("error parsing json: %s", err)
90 }
91 fd.Close()
92
93 havetoot := func(xid string) bool {
94 var id int64
95 row := stmtFindXonk.QueryRow(user.ID, xid)
96 err := row.Scan(&id)
97 if err == nil {
98 return true
99 }
100 return false
101 }
102
103 re_tootid := regexp.MustCompile("[^/]+$")
104 for _, item := range outbox.OrderedItems {
105 toot := item
106 if toot.Type != "Create" {
107 continue
108 }
109 if strings.HasSuffix(toot.Id, "/activity") {
110 toot.Id = strings.TrimSuffix(toot.Id, "/activity")
111 }
112 tootid := re_tootid.FindString(toot.Id)
113 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
114 if havetoot(xid) {
115 continue
116 }
117 honk := Honk{
118 UserID: user.ID,
119 What: "honk",
120 Honker: user.URL,
121 XID: xid,
122 RID: toot.Object.InReplyTo,
123 Date: toot.Object.Published,
124 URL: xid,
125 Audience: append(toot.To, toot.Cc...),
126 Noise: toot.Object.Content,
127 Convoy: toot.Object.Conversation,
128 Whofore: 2,
129 Format: "html",
130 Precis: toot.Object.Summary,
131 }
132 if honk.RID != "" {
133 honk.What = "tonk"
134 }
135 if !loudandproud(honk.Audience) {
136 honk.Whofore = 3
137 }
138 for _, att := range toot.Object.Attachment {
139 switch att.Type {
140 case "Document":
141 fname := fmt.Sprintf("%s/%s", source, att.Url)
142 data, err := ioutil.ReadFile(fname)
143 if err != nil {
144 elog.Printf("error reading media: %s", fname)
145 continue
146 }
147 u := xfiltrate()
148 name := att.Name
149 desc := name
150 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
151 fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
152 if err != nil {
153 elog.Printf("error saving media: %s", fname)
154 continue
155 }
156 donk := &Donk{
157 FileID: fileid,
158 }
159 honk.Donks = append(honk.Donks, donk)
160 }
161 }
162 for _, t := range toot.Object.Tag {
163 switch t.Type {
164 case "Hashtag":
165 honk.Onts = append(honk.Onts, t.Name)
166 }
167 }
168 savehonk(&honk)
169 }
170}
171
172func importTwitter(username, source string) {
173 user, err := butwhatabout(username)
174 if err != nil {
175 elog.Fatal(err)
176 }
177
178 type Tweet struct {
179 ID_str string
180 Created_at string
181 Full_text string
182 In_reply_to_screen_name string
183 In_reply_to_status_id string
184 Entities struct {
185 Hashtags []struct {
186 Text string
187 }
188 Media []struct {
189 Url string
190 Media_url string
191 }
192 Urls []struct {
193 Url string
194 Expanded_url string
195 }
196 }
197 date time.Time
198 convoy string
199 }
200
201 var tweets []*Tweet
202 fd, err := os.Open(source + "/tweet.js")
203 if err != nil {
204 elog.Fatal(err)
205 }
206 // skip past window.YTD.tweet.part0 =
207 fd.Seek(25, 0)
208 dec := json.NewDecoder(fd)
209 err = dec.Decode(&tweets)
210 if err != nil {
211 elog.Fatalf("error parsing json: %s", err)
212 }
213 fd.Close()
214 tweetmap := make(map[string]*Tweet)
215 for _, t := range tweets {
216 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
217 tweetmap[t.ID_str] = t
218 }
219 sort.Slice(tweets, func(i, j int) bool {
220 return tweets[i].date.Before(tweets[j].date)
221 })
222 havetwid := func(xid string) bool {
223 var id int64
224 row := stmtFindXonk.QueryRow(user.ID, xid)
225 err := row.Scan(&id)
226 if err == nil {
227 return true
228 }
229 return false
230 }
231
232 for _, t := range tweets {
233 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
234 if havetwid(xid) {
235 continue
236 }
237 what := "honk"
238 noise := ""
239 if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
240 t.convoy = parent.convoy
241 what = "tonk"
242 } else {
243 t.convoy = "data:,acoustichonkytonk-" + t.ID_str
244 if t.In_reply_to_screen_name != "" {
245 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
246 t.In_reply_to_screen_name, t.In_reply_to_status_id)
247 what = "tonk"
248 }
249 }
250 audience := []string{thewholeworld}
251 honk := Honk{
252 UserID: user.ID,
253 Username: user.Name,
254 What: what,
255 Honker: user.URL,
256 XID: xid,
257 Date: t.date,
258 Format: "markdown",
259 Audience: audience,
260 Convoy: t.convoy,
261 Public: true,
262 Whofore: 2,
263 }
264 noise += t.Full_text
265 // unbelievable
266 noise = html.UnescapeString(noise)
267 for _, r := range t.Entities.Urls {
268 noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
269 }
270 for _, m := range t.Entities.Media {
271 u := m.Media_url
272 idx := strings.LastIndexByte(u, '/')
273 u = u[idx+1:]
274 fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
275 data, err := ioutil.ReadFile(fname)
276 if err != nil {
277 elog.Printf("error reading media: %s", fname)
278 continue
279 }
280 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
281
282 fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
283 if err != nil {
284 elog.Printf("error saving media: %s", fname)
285 continue
286 }
287 donk := &Donk{
288 FileID: fileid,
289 }
290 honk.Donks = append(honk.Donks, donk)
291 noise = strings.Replace(noise, m.Url, "", -1)
292 }
293 for _, ht := range t.Entities.Hashtags {
294 honk.Onts = append(honk.Onts, "#"+ht.Text)
295 }
296 honk.Noise = noise
297 savehonk(&honk)
298 }
299}