all repos — honk @ a504f3774072be850266c2ecfe95e583daba38ff

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/json"
 20	"fmt"
 21	"html"
 22	"io/ioutil"
 23	"log"
 24	"os"
 25	"regexp"
 26	"sort"
 27	"strings"
 28	"time"
 29)
 30
 31func importMain(username, flavor, source string) {
 32	switch flavor {
 33	case "mastodon":
 34		importMastodon(username, source)
 35	case "twitter":
 36		importTwitter(username, source)
 37	default:
 38		log.Fatal("unknown source flavor")
 39	}
 40}
 41
 42type TootObject struct {
 43	Summary      string
 44	Content      string
 45	InReplyTo    string
 46	Conversation string
 47	Published    time.Time
 48	Tag          []struct {
 49		Type string
 50		Name string
 51	}
 52	Attachment []struct {
 53		Type      string
 54		MediaType string
 55		Url       string
 56		Name      string
 57	}
 58}
 59
 60type PlainTootObject TootObject
 61
 62func (obj *TootObject) UnmarshalJSON(b []byte) error {
 63	p := (*PlainTootObject)(obj)
 64	json.Unmarshal(b, p)
 65	return nil
 66}
 67
 68func importMastodon(username, source string) {
 69	user, err := butwhatabout(username)
 70	if err != nil {
 71		log.Fatal(err)
 72	}
 73	type Toot struct {
 74		Id     string
 75		Type   string
 76		To     []string
 77		Cc     []string
 78		Object TootObject
 79	}
 80	var outbox struct {
 81		OrderedItems []Toot
 82	}
 83	fd, err := os.Open(source + "/outbox.json")
 84	if err != nil {
 85		log.Fatal(err)
 86	}
 87	dec := json.NewDecoder(fd)
 88	err = dec.Decode(&outbox)
 89	if err != nil {
 90		log.Fatalf("error parsing json: %s", err)
 91	}
 92	fd.Close()
 93
 94	havetoot := func(xid string) bool {
 95		var id int64
 96		row := stmtFindXonk.QueryRow(user.ID, xid)
 97		err := row.Scan(&id)
 98		if err == nil {
 99			return true
100		}
101		return false
102	}
103
104	re_tootid := regexp.MustCompile("[^/]+$")
105	for _, item := range outbox.OrderedItems {
106		toot := item
107		if toot.Type != "Create" {
108			continue
109		}
110		tootid := re_tootid.FindString(toot.Id)
111		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
112		if havetoot(xid) {
113			continue
114		}
115		honk := Honk{
116			UserID:   user.ID,
117			What:     "honk",
118			Honker:   user.URL,
119			XID:      xid,
120			RID:      toot.Object.InReplyTo,
121			Date:     toot.Object.Published,
122			URL:      xid,
123			Audience: append(toot.To, toot.Cc...),
124			Noise:    toot.Object.Content,
125			Convoy:   toot.Object.Conversation,
126			Whofore:  2,
127			Format:   "html",
128			Precis:   toot.Object.Summary,
129		}
130		if honk.RID != "" {
131			honk.What = "tonk"
132		}
133		if !loudandproud(honk.Audience) {
134			honk.Whofore = 3
135		}
136		for _, att := range toot.Object.Attachment {
137			switch att.Type {
138			case "Document":
139				fname := fmt.Sprintf("%s/%s", source, att.Url)
140				data, err := ioutil.ReadFile(fname)
141				if err != nil {
142					log.Printf("error reading media: %s", fname)
143					continue
144				}
145				u := xfiltrate()
146				name := att.Name
147				desc := name
148				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
149				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
150				if err != nil {
151					log.Printf("error saving media: %s", fname)
152					continue
153				}
154				donk := &Donk{
155					FileID: fileid,
156				}
157				honk.Donks = append(honk.Donks, donk)
158			}
159		}
160		for _, t := range toot.Object.Tag {
161			switch t.Type {
162			case "Hashtag":
163				honk.Onts = append(honk.Onts, t.Name)
164			}
165		}
166		savehonk(&honk)
167	}
168}
169
170func importTwitter(username, source string) {
171	user, err := butwhatabout(username)
172	if err != nil {
173		log.Fatal(err)
174	}
175
176	type Tweet struct {
177		ID_str                  string
178		Created_at              string
179		Full_text               string
180		In_reply_to_screen_name string
181		In_reply_to_status_id   string
182		Entities                struct {
183			Hashtags []struct {
184				Text string
185			}
186			Media []struct {
187				Url       string
188				Media_url string
189			}
190			Urls []struct {
191				Url          string
192				Expanded_url string
193			}
194		}
195		date   time.Time
196		convoy string
197	}
198
199	var tweets []*Tweet
200	fd, err := os.Open(source + "/tweet.js")
201	if err != nil {
202		log.Fatal(err)
203	}
204	// skip past window.YTD.tweet.part0 =
205	fd.Seek(25, 0)
206	dec := json.NewDecoder(fd)
207	err = dec.Decode(&tweets)
208	if err != nil {
209		log.Fatalf("error parsing json: %s", err)
210	}
211	fd.Close()
212	tweetmap := make(map[string]*Tweet)
213	for _, t := range tweets {
214		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
215		tweetmap[t.ID_str] = t
216	}
217	sort.Slice(tweets, func(i, j int) bool {
218		return tweets[i].date.Before(tweets[j].date)
219	})
220	havetwid := func(xid string) bool {
221		var id int64
222		row := stmtFindXonk.QueryRow(user.ID, xid)
223		err := row.Scan(&id)
224		if err == nil {
225			return true
226		}
227		return false
228	}
229
230	for _, t := range tweets {
231		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
232		if havetwid(xid) {
233			continue
234		}
235		what := "honk"
236		noise := ""
237		if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
238			t.convoy = parent.convoy
239			what = "tonk"
240		} else {
241			t.convoy = "data:,acoustichonkytonk-" + t.ID_str
242			if t.In_reply_to_screen_name != "" {
243				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
244					t.In_reply_to_screen_name, t.In_reply_to_status_id)
245				what = "tonk"
246			}
247		}
248		audience := []string{thewholeworld}
249		honk := Honk{
250			UserID:   user.ID,
251			Username: user.Name,
252			What:     what,
253			Honker:   user.URL,
254			XID:      xid,
255			Date:     t.date,
256			Format:   "markdown",
257			Audience: audience,
258			Convoy:   t.convoy,
259			Public:   true,
260			Whofore:  2,
261		}
262		noise += t.Full_text
263		// unbelievable
264		noise = html.UnescapeString(noise)
265		for _, r := range t.Entities.Urls {
266			noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
267		}
268		for _, m := range t.Entities.Media {
269			u := m.Media_url
270			idx := strings.LastIndexByte(u, '/')
271			u = u[idx+1:]
272			fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
273			data, err := ioutil.ReadFile(fname)
274			if err != nil {
275				log.Printf("error reading media: %s", fname)
276				continue
277			}
278			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
279
280			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
281			if err != nil {
282				log.Printf("error saving media: %s", fname)
283				continue
284			}
285			donk := &Donk{
286				FileID: fileid,
287			}
288			honk.Donks = append(honk.Donks, donk)
289			noise = strings.Replace(noise, m.Url, "", -1)
290		}
291		for _, ht := range t.Entities.Hashtags {
292			honk.Onts = append(honk.Onts, "#"+ht.Text)
293		}
294		honk.Noise = noise
295		savehonk(&honk)
296	}
297}