import.go (view raw)
1//
2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
3//
4// Permission to use, copy, modify, and distribute this software for any
5// purpose with or without fee is hereby granted, provided that the above
6// copyright notice and this permission notice appear in all copies.
7//
8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16package main
17
18import (
19 "encoding/json"
20 "fmt"
21 "html"
22 "io/ioutil"
23 "log"
24 "os"
25 "regexp"
26 "sort"
27 "strings"
28 "time"
29
30 "humungus.tedunangst.com/r/webs/htfilter"
31)
32
33func importMain(username, flavor, source string) {
34 switch flavor {
35 case "mastodon":
36 importMastodon(username, source)
37 case "twitter":
38 importTwitter(username, source)
39 default:
40 log.Fatal("unknown source flavor")
41 }
42}
43
44func importMastodon(username, source string) {
45 user, err := butwhatabout(username)
46 if err != nil {
47 log.Fatal(err)
48 }
49 type Toot struct {
50 Id string
51 Type string
52 To []string
53 Cc []string
54 Summary string
55 Content string
56 InReplyTo string
57 Conversation string
58 Published time.Time
59 Tag []struct {
60 Type string
61 Name string
62 }
63 Attachment []struct {
64 Type string
65 MediaType string
66 Url string
67 Name string
68 }
69 }
70 var outbox struct {
71 OrderedItems []struct {
72 Object Toot
73 }
74 }
75 fd, err := os.Open(source + "/outbox.json")
76 if err != nil {
77 log.Fatal(err)
78 }
79 dec := json.NewDecoder(fd)
80 err = dec.Decode(&outbox)
81 if err != nil {
82 log.Fatalf("error parsing json: %s", err)
83 }
84 fd.Close()
85
86 havetoot := func(xid string) bool {
87 var id int64
88 row := stmtFindXonk.QueryRow(user.ID, xid)
89 err := row.Scan(&id)
90 if err == nil {
91 return true
92 }
93 return false
94 }
95
96 re_tootid := regexp.MustCompile("[^/]+$")
97 for _, item := range outbox.OrderedItems {
98 toot := item.Object
99 tootid := re_tootid.FindString(toot.Id)
100 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
101 if havetoot(xid) {
102 continue
103 }
104 honk := Honk{
105 UserID: user.ID,
106 What: "honk",
107 Honker: user.URL,
108 XID: xid,
109 RID: toot.InReplyTo,
110 Date: toot.Published,
111 URL: xid,
112 Audience: append(toot.To, toot.Cc...),
113 Noise: toot.Content,
114 Convoy: toot.Conversation,
115 Whofore: 2,
116 Format: "html",
117 Precis: toot.Summary,
118 }
119 if honk.RID != "" {
120 honk.What = "tonk"
121 }
122 if !loudandproud(honk.Audience) {
123 honk.Whofore = 3
124 }
125 for _, att := range toot.Attachment {
126 switch att.Type {
127 case "Document":
128 fname := fmt.Sprintf("%s/%s", source, att.Url)
129 data, err := ioutil.ReadFile(fname)
130 if err != nil {
131 log.Printf("error reading media: %s", fname)
132 continue
133 }
134 u := xfiltrate()
135 name := att.Name
136 desc := name
137 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
138 fileid, err := savefile(u, name, desc, newurl, att.MediaType, true, data)
139 if err != nil {
140 log.Printf("error saving media: %s", fname)
141 continue
142 }
143 donk := &Donk{
144 FileID: fileid,
145 }
146 honk.Donks = append(honk.Donks, donk)
147 }
148 }
149 for _, t := range toot.Tag {
150 switch t.Type {
151 case "Hashtag":
152 honk.Onts = append(honk.Onts, t.Name)
153 }
154 }
155 savehonk(&honk)
156 }
157}
158
159func importTwitter(username, source string) {
160 user, err := butwhatabout(username)
161 if err != nil {
162 log.Fatal(err)
163 }
164
165 type Tweet struct {
166 ID_str string
167 Created_at string
168 Full_text string
169 In_reply_to_screen_name string
170 In_reply_to_status_id string
171 Entities struct {
172 Hashtags []struct {
173 Text string
174 }
175 Media []struct {
176 Url string
177 Media_url string
178 }
179 Urls []struct {
180 Url string
181 Expanded_url string
182 }
183 }
184 date time.Time
185 convoy string
186 }
187
188 var tweets []*Tweet
189 fd, err := os.Open(source + "/tweet.js")
190 if err != nil {
191 log.Fatal(err)
192 }
193 // skip past window.YTD.tweet.part0 =
194 fd.Seek(25, 0)
195 dec := json.NewDecoder(fd)
196 err = dec.Decode(&tweets)
197 if err != nil {
198 log.Fatalf("error parsing json: %s", err)
199 }
200 fd.Close()
201 tweetmap := make(map[string]*Tweet)
202 for _, t := range tweets {
203 t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
204 tweetmap[t.ID_str] = t
205 }
206 sort.Slice(tweets, func(i, j int) bool {
207 return tweets[i].date.Before(tweets[j].date)
208 })
209 havetwid := func(xid string) bool {
210 var id int64
211 row := stmtFindXonk.QueryRow(user.ID, xid)
212 err := row.Scan(&id)
213 if err == nil {
214 return true
215 }
216 return false
217 }
218
219 for _, t := range tweets {
220 xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
221 if havetwid(xid) {
222 continue
223 }
224 what := "honk"
225 noise := ""
226 if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
227 t.convoy = parent.convoy
228 what = "tonk"
229 } else {
230 t.convoy = "data:,acoustichonkytonk-" + t.ID_str
231 if t.In_reply_to_screen_name != "" {
232 noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
233 t.In_reply_to_screen_name, t.In_reply_to_status_id)
234 what = "tonk"
235 }
236 }
237 audience := []string{thewholeworld}
238 honk := Honk{
239 UserID: user.ID,
240 Username: user.Name,
241 What: what,
242 Honker: user.URL,
243 XID: xid,
244 Date: t.date,
245 Format: "markdown",
246 Audience: audience,
247 Convoy: t.convoy,
248 Public: true,
249 Whofore: 2,
250 }
251 noise += t.Full_text
252 // unbelievable
253 noise = html.UnescapeString(noise)
254 for _, r := range t.Entities.Urls {
255 noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
256 }
257 for _, m := range t.Entities.Media {
258 u := m.Media_url
259 idx := strings.LastIndexByte(u, '/')
260 u = u[idx+1:]
261 fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
262 data, err := ioutil.ReadFile(fname)
263 if err != nil {
264 log.Printf("error reading media: %s", fname)
265 continue
266 }
267 newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
268
269 fileid, err := savefile(u, u, u, newurl, "image/jpg", true, data)
270 if err != nil {
271 log.Printf("error saving media: %s", fname)
272 continue
273 }
274 donk := &Donk{
275 FileID: fileid,
276 }
277 honk.Donks = append(honk.Donks, donk)
278 noise = strings.Replace(noise, m.Url, "", -1)
279 }
280 for _, ht := range t.Entities.Hashtags {
281 honk.Onts = append(honk.Onts, "#"+ht.Text)
282 }
283 honk.Noise = noise
284 savehonk(&honk)
285 }
286}