all repos — honk @ 8f557f8374725f7df73656083cd92d45d907896a

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/json"
 20	"fmt"
 21	"html"
 22	"io/ioutil"
 23	"log"
 24	"os"
 25	"regexp"
 26	"sort"
 27	"strings"
 28	"time"
 29)
 30
 31func importMain(username, flavor, source string) {
 32	switch flavor {
 33	case "mastodon":
 34		importMastodon(username, source)
 35	case "twitter":
 36		importTwitter(username, source)
 37	default:
 38		log.Fatal("unknown source flavor")
 39	}
 40}
 41
 42func importMastodon(username, source string) {
 43	user, err := butwhatabout(username)
 44	if err != nil {
 45		log.Fatal(err)
 46	}
 47	type Toot struct {
 48		Id           string
 49		Type         string
 50		To           []string
 51		Cc           []string
 52		Summary      string
 53		Content      string
 54		InReplyTo    string
 55		Conversation string
 56		Published    time.Time
 57		Tag          []struct {
 58			Type string
 59			Name string
 60		}
 61		Attachment []struct {
 62			Type      string
 63			MediaType string
 64			Url       string
 65			Name      string
 66		}
 67	}
 68	var outbox struct {
 69		OrderedItems []struct {
 70			Object Toot
 71		}
 72	}
 73	fd, err := os.Open(source + "/outbox.json")
 74	if err != nil {
 75		log.Fatal(err)
 76	}
 77	dec := json.NewDecoder(fd)
 78	err = dec.Decode(&outbox)
 79	if err != nil {
 80		log.Fatalf("error parsing json: %s", err)
 81	}
 82	fd.Close()
 83
 84	havetoot := func(xid string) bool {
 85		var id int64
 86		row := stmtFindXonk.QueryRow(user.ID, xid)
 87		err := row.Scan(&id)
 88		if err == nil {
 89			return true
 90		}
 91		return false
 92	}
 93
 94	re_tootid := regexp.MustCompile("[^/]+$")
 95	for _, item := range outbox.OrderedItems {
 96		toot := item.Object
 97		tootid := re_tootid.FindString(toot.Id)
 98		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
 99		if havetoot(xid) {
100			continue
101		}
102		honk := Honk{
103			UserID:   user.ID,
104			What:     "honk",
105			Honker:   user.URL,
106			XID:      xid,
107			RID:      toot.InReplyTo,
108			Date:     toot.Published,
109			URL:      xid,
110			Audience: append(toot.To, toot.Cc...),
111			Noise:    toot.Content,
112			Convoy:   toot.Conversation,
113			Whofore:  2,
114			Format:   "html",
115			Precis:   toot.Summary,
116		}
117		if honk.RID != "" {
118			honk.What = "tonk"
119		}
120		if !loudandproud(honk.Audience) {
121			honk.Whofore = 3
122		}
123		for _, att := range toot.Attachment {
124			switch att.Type {
125			case "Document":
126				fname := fmt.Sprintf("%s/%s", source, att.Url)
127				data, err := ioutil.ReadFile(fname)
128				if err != nil {
129					log.Printf("error reading media: %s", fname)
130					continue
131				}
132				u := xfiltrate()
133				name := att.Name
134				desc := name
135				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
136				fileid, err := savefile(u, name, desc, newurl, att.MediaType, true, data)
137				if err != nil {
138					log.Printf("error saving media: %s", fname)
139					continue
140				}
141				donk := &Donk{
142					FileID: fileid,
143				}
144				honk.Donks = append(honk.Donks, donk)
145			}
146		}
147		for _, t := range toot.Tag {
148			switch t.Type {
149			case "Hashtag":
150				honk.Onts = append(honk.Onts, t.Name)
151			}
152		}
153		savehonk(&honk)
154	}
155}
156
157func importTwitter(username, source string) {
158	user, err := butwhatabout(username)
159	if err != nil {
160		log.Fatal(err)
161	}
162
163	type Tweet struct {
164		ID_str                  string
165		Created_at              string
166		Full_text               string
167		In_reply_to_screen_name string
168		In_reply_to_status_id   string
169		Entities                struct {
170			Hashtags []struct {
171				Text string
172			}
173			Media []struct {
174				Url       string
175				Media_url string
176			}
177			Urls []struct {
178				Url          string
179				Expanded_url string
180			}
181		}
182		date   time.Time
183		convoy string
184	}
185
186	var tweets []*Tweet
187	fd, err := os.Open(source + "/tweet.js")
188	if err != nil {
189		log.Fatal(err)
190	}
191	// skip past window.YTD.tweet.part0 =
192	fd.Seek(25, 0)
193	dec := json.NewDecoder(fd)
194	err = dec.Decode(&tweets)
195	if err != nil {
196		log.Fatalf("error parsing json: %s", err)
197	}
198	fd.Close()
199	tweetmap := make(map[string]*Tweet)
200	for _, t := range tweets {
201		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
202		tweetmap[t.ID_str] = t
203	}
204	sort.Slice(tweets, func(i, j int) bool {
205		return tweets[i].date.Before(tweets[j].date)
206	})
207	havetwid := func(xid string) bool {
208		var id int64
209		row := stmtFindXonk.QueryRow(user.ID, xid)
210		err := row.Scan(&id)
211		if err == nil {
212			return true
213		}
214		return false
215	}
216
217	for _, t := range tweets {
218		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
219		if havetwid(xid) {
220			continue
221		}
222		what := "honk"
223		noise := ""
224		if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
225			t.convoy = parent.convoy
226			what = "tonk"
227		} else {
228			t.convoy = "data:,acoustichonkytonk-" + t.ID_str
229			if t.In_reply_to_screen_name != "" {
230				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
231					t.In_reply_to_screen_name, t.In_reply_to_status_id)
232				what = "tonk"
233			}
234		}
235		audience := []string{thewholeworld}
236		honk := Honk{
237			UserID:   user.ID,
238			Username: user.Name,
239			What:     what,
240			Honker:   user.URL,
241			XID:      xid,
242			Date:     t.date,
243			Format:   "markdown",
244			Audience: audience,
245			Convoy:   t.convoy,
246			Public:   true,
247			Whofore:  2,
248		}
249		noise += t.Full_text
250		// unbelievable
251		noise = html.UnescapeString(noise)
252		for _, r := range t.Entities.Urls {
253			noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
254		}
255		for _, m := range t.Entities.Media {
256			u := m.Media_url
257			idx := strings.LastIndexByte(u, '/')
258			u = u[idx+1:]
259			fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
260			data, err := ioutil.ReadFile(fname)
261			if err != nil {
262				log.Printf("error reading media: %s", fname)
263				continue
264			}
265			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
266
267			fileid, err := savefile(u, u, u, newurl, "image/jpg", true, data)
268			if err != nil {
269				log.Printf("error saving media: %s", fname)
270				continue
271			}
272			donk := &Donk{
273				FileID: fileid,
274			}
275			honk.Donks = append(honk.Donks, donk)
276			noise = strings.Replace(noise, m.Url, "", -1)
277		}
278		for _, ht := range t.Entities.Hashtags {
279			honk.Onts = append(honk.Onts, "#"+ht.Text)
280		}
281		honk.Noise = noise
282		savehonk(&honk)
283	}
284}