Answer the question
In order to leave comments, you need to log in
How to parse yml file with Windows-1251 encoding?
There is a file in yml format on a remote FTP server. You need to parse it correctly. There is code that works for UTF8 encoding. Where am I doing it wrong?
func DownloadPowerFile(url string) *PowerCatalog {
list := PowerCatalog{}
ftpClient, err := ftp.Dial(globals.GlobalSettings.FtpAddress)
defer ftpClient.Quit()
err = ftpClient.Login(globals.GlobalSettings.FtpUser, globals.GlobalSettings.FtpPassword)
err = ftpClient.ChangeDir("/U16-user/")
resp, _ := ftpClient.Retr(url) // Retr выдает команду RETR FTP для извлечения указанного файла с удаленного FTP сервера
//sr := strings.NewReader(url)
//tr := transform.NewReader(resp, charmap.Windows1251.NewDecoder())
body, err := ioutil.ReadAll(resp)
if err != nil {
return &list
}
newbody := strings.Replace(string(body), "<offers>", "", -1)
newbody = strings.Replace(newbody, "</offers>", "", -1)
err = xml.Unmarshal([]byte(newbody), &list)
if err != nil {
fmt.Println(err)
return &list
}
fmt.Println("OKOK")
return &list
}
Answer the question
In order to leave comments, you need to log in
The problem was solved in a different way. I propose a method below. We did the conversion using the Linux command.
func DownloadPowerFile(url string) *PowerCatalog {
list := PowerCatalog{}
// скачиваем файл по ссылке и сохраняем в папку files
err := DownloadFilePower("Powerplant_Talapai.yml", url)
if err != nil {
fmt.Println(err)
return &list
}
var path string
path = "/root/go-workpath/src/stocks/files/"
// переконвертируем файл
cmd := exec.Command("iconv", "-f", "cp1251", "-t", "utf8", path + "Powerplant_Talapai.yml", "-o", path + "Powerplant_Talapai8.yml")
out, err := cmd.CombinedOutput()
if err != nil {
fmt.Printf("cmd.Run() failed with %s\n", err)
}
fmt.Printf("combined out:\n%s\n", string(out))
// из файла Powerplant_Talapai8.yml убираем строку encoding="windows-1251". Надо проверит в наличие программу SED
cmd2 := exec.Command("sed", "-i", "s/ encoding=\"windows-1251\"//", path + "Powerplant_Talapai8.yml")
out2, err2 := cmd2.CombinedOutput()
if err2 != nil {
fmt.Printf("cmd.Run() failed with %s\n", err2)
}
fmt.Printf("combined out:\n%s\n", string(out2))
// открытие файла
resp, err := os.Open(path + "Powerplant_Talapai8.yml")
if(err != nil) {
fmt.Println("Невозможно найти файл или открыть")
return &list
} else {
defer resp.Close()
body, err := ioutil.ReadAll(resp)
if err != nil {
return &list
}
newbody := strings.Replace(string(body), "<offers>", "", -1)
newbody = strings.Replace(newbody, "</offers>", "", -1)
err = xml.Unmarshal([]byte(newbody), &list)
if err != nil {
fmt.Println(err)
return &list
}
return &list
}
}
func DownloadFilePower(filepath string, url string) error {
// Create the file
out, err := os.Create("files/"+filepath)
if err != nil {
return err
}
defer out.Close()
// Get the data
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
if err != nil {
return err
}
return nil
}
There is a Decoder.CharsetReader field for this:
// CharsetReader, if non-nil, defines a function to generate
// charset-conversion readers, converting from the provided
// non-UTF-8 charset into UTF-8. If CharsetReader is nil or
// returns an error, parsing stops with an error. One of the
// the CharsetReader's result values must be non-nil.
CharsetReader func(charset string, input io.Reader) (io.Reader, error)
You can use it like this:
d := xml.NewDecoder(xmlFile)
d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
switch charset {
case "windows-1251":
return charmap.Windows1251.NewDecoder().Reader(input), nil
default:
return nil, fmt.Errorf("unknown charset: %s", charset)
}
}
err := d.Decode(&zlList)
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question